From 2028ebe120aab22bfd0b2baf8902d4c9627eb33f Mon Sep 17 00:00:00 2001 From: akerr Date: Wed, 16 May 2018 11:44:56 -0700 Subject: [PATCH] CUTLASS v1.0 release --- CMake/bin2hex.cmake | 26 + CMakeLists.txt | 182 + Doxyfile | 15 +- README.md | 253 +- clang-format.sh | 17 + common.mk | 181 - cutlass/convert.h | 102 + cutlass/coord.h | 287 + cutlass/core_io.h | 44 + cutlass/cutlass.h | 73 + cutlass/fragment.h | 278 + cutlass/fragment_load_store.h | 135 + cutlass/fragment_multiply_add.h | 131 + cutlass/gemm/block_loader.h | 162 - cutlass/gemm/block_loader_congruous_dp1.h | 406 - cutlass/gemm/block_loader_congruous_idp4.h | 544 -- cutlass/gemm/block_loader_crosswise.h | 411 - cutlass/gemm/block_loader_wmma.h | 322 - cutlass/gemm/block_task.h | 677 -- cutlass/gemm/block_task_wmma.h | 767 -- cutlass/gemm/clear_accumulators.h | 55 + cutlass/gemm/dgemm_traits.h | 127 + cutlass/gemm/dispatch.h | 542 -- cutlass/gemm/dispatch_policies.h | 661 -- cutlass/gemm/dp_accummulate.h | 223 - cutlass/gemm/epilogue_function.h | 104 - cutlass/gemm/gemm.h | 319 + cutlass/gemm/gemm_epilogue.h | 225 + cutlass/gemm/gemm_epilogue_traits.h | 331 + cutlass/gemm/gemm_global_stream.h | 175 + cutlass/gemm/gemm_global_tile.h | 478 + cutlass/gemm/gemm_operand.h | 141 + cutlass/gemm/gemm_shared_stream.h | 113 + cutlass/gemm/gemm_shared_tile.h | 406 + cutlass/gemm/gemm_traits.h | 747 ++ cutlass/gemm/grid_raster.h | 436 - cutlass/gemm/hgemm_global_tile.h | 90 + cutlass/gemm/hgemm_multiply_add.h | 104 + cutlass/gemm/hgemm_swizzle.h | 94 + cutlass/gemm/hgemm_traits.h | 391 + cutlass/gemm/identity_block_swizzle.h | 48 + cutlass/gemm/igemm_epilogue.h | 320 + cutlass/gemm/igemm_global_tile.h | 95 + cutlass/gemm/igemm_multiply_add.h | 89 + cutlass/gemm/igemm_swizzle.h | 115 + cutlass/gemm/igemm_traits.h | 393 + cutlass/gemm/k_split_control.h | 310 - cutlass/gemm/linear_scaling.h | 86 + cutlass/gemm/sgemm_traits.h | 127 + cutlass/gemm/thread_accumulator.h | 469 - cutlass/gemm/thread_multiply_add.h | 84 + cutlass/gemm/wmma_accumulator.h | 215 - cutlass/gemm/wmma_gemm_epilogue_traits.h | 161 + cutlass/gemm/wmma_gemm_global_tile.h | 203 + cutlass/gemm/wmma_gemm_multiply_add.h | 108 + cutlass/gemm/wmma_gemm_shared_tile.h | 240 + cutlass/gemm/wmma_gemm_traits.h | 574 ++ cutlass/iterator_access.h | 325 + cutlass/load_store.h | 199 + cutlass/matrix_traits.h | 48 + cutlass/predicate_vector.h | 493 ++ cutlass/reshape_tile.h | 58 + cutlass/shape.h | 301 + cutlass/tensor_ref.h | 151 + cutlass/tensor_view.h | 172 + cutlass/tile_iterator.h | 881 ++ cutlass/tile_traits_standard.h | 238 + cutlass/util/cutlass_math.h | 131 + cutlass/util/debug.h | 136 +- cutlass/util/device_introspection.h | 224 - cutlass/util/io_intrinsics.h | 492 -- cutlass/util/math.h | 167 - cutlass/util/matrix_transform.h | 102 - cutlass/util/nv_std.h | 705 -- cutlass/util/platform.h | 801 ++ cutlass/util/printable.h | 72 - cutlass/util/util.h | 82 - cutlass/vector.h | 229 + cutlass/wmma_matrix.h | 193 + cutlass_test/.gitignore | 7 - cutlass_test/Makefile | 180 - cutlass_test/cublas_dispatch.h | 300 - cutlass_test/cutlass_dispatch.h | 261 - cutlass_test/gemm.cu | 572 -- cutlass_test/util/command_line.h | 320 - cutlass_test/util/exceptions.h | 91 - cutlass_test/util/half.h | 231 - cutlass_test/util/matrix.h | 503 -- cutlass_test/util/timer.h | 107 - cutlass_test/util/type_conversion.h | 163 - docs/generated-html/annotated.html | 378 + docs/generated-html/bc_s.png | Bin 0 -> 682 bytes docs/generated-html/bdwn.png | Bin 0 -> 147 bytes ...dicateVector_1_1ConstIterator-members.html | 99 + ...s_1_1PredicateVector_1_1ConstIterator.html | 389 + ..._1PredicateVector_1_1Iterator-members.html | 101 + ...utlass_1_1PredicateVector_1_1Iterator.html | 451 + .../classcutlass_1_1TensorRef-members.html | 109 + .../classcutlass_1_1TensorRef.html | 704 ++ .../classcutlass_1_1TensorView-members.html | 125 + .../classcutlass_1_1TensorView.html | 915 ++ .../classcutlass_1_1TensorView.png | Bin 0 -> 690 bytes ...ss_1_1platform_1_1unique__ptr-members.html | 106 + ...asscutlass_1_1platform_1_1unique__ptr.html | 554 ++ docs/generated-html/classes.html | 173 + .../classnv__std_1_1unique__ptr-members.html | 106 + .../classnv__std_1_1unique__ptr.html | 554 ++ .../clear__accumulators_8h.html | 112 + .../clear__accumulators_8h_source.html | 93 + docs/generated-html/closed.png | Bin 0 -> 133 bytes docs/generated-html/convert_8h.html | 111 + docs/generated-html/convert_8h_source.html | 102 + docs/generated-html/coord_8h.html | 139 + docs/generated-html/coord_8h_source.html | 127 + docs/generated-html/core__io_8h.html | 135 + docs/generated-html/core__io_8h_source.html | 88 + docs/generated-html/cutlass_8h.html | 237 + docs/generated-html/cutlass_8h_source.html | 88 + docs/generated-html/cutlass__math_8h.html | 132 + .../cutlass__math_8h_source.html | 104 + docs/generated-html/debug_8h.html | 239 + docs/generated-html/debug_8h_source.html | 89 + docs/generated-html/dgemm__traits_8h.html | 117 + .../dgemm__traits_8h_source.html | 103 + .../dir_1417ee5ebebc309c36b7962f26a92c39.html | 155 + .../dir_18d6a367a3982a494d65599933fc67a3.html | 178 + .../dir_c5917a9a879e9a6c73eaf5237444ab84.html | 100 + docs/generated-html/doc.png | Bin 0 -> 751 bytes docs/generated-html/doxygen.css | 1596 ++++ docs/generated-html/doxygen.png | Bin 0 -> 3888 bytes docs/generated-html/dynsections.js | 120 + docs/generated-html/files.html | 138 + docs/generated-html/folderclosed.png | Bin 0 -> 649 bytes docs/generated-html/folderopen.png | Bin 0 -> 644 bytes docs/generated-html/fragment_8h.html | 124 + docs/generated-html/fragment_8h_source.html | 141 + .../fragment__load__store_8h.html | 118 + .../fragment__load__store_8h_source.html | 106 + .../fragment__multiply__add_8h.html | 111 + .../fragment__multiply__add_8h_source.html | 105 + docs/generated-html/fragment__stream_8h.html | 118 + .../fragment__stream_8h_source.html | 135 + docs/generated-html/functions.html | 149 + docs/generated-html/functions_0x7e.html | 86 + docs/generated-html/functions_b.html | 122 + docs/generated-html/functions_c.html | 154 + docs/generated-html/functions_d.html | 133 + docs/generated-html/functions_e.html | 114 + docs/generated-html/functions_enum.html | 89 + docs/generated-html/functions_eval.html | 172 + docs/generated-html/functions_f.html | 168 + docs/generated-html/functions_func.html | 97 + docs/generated-html/functions_func_0x7e.html | 86 + docs/generated-html/functions_func_b.html | 86 + docs/generated-html/functions_func_c.html | 141 + docs/generated-html/functions_func_d.html | 95 + docs/generated-html/functions_func_e.html | 95 + docs/generated-html/functions_func_f.html | 102 + docs/generated-html/functions_func_g.html | 122 + docs/generated-html/functions_func_h.html | 86 + docs/generated-html/functions_func_i.html | 163 + docs/generated-html/functions_func_l.html | 105 + docs/generated-html/functions_func_m.html | 95 + docs/generated-html/functions_func_o.html | 193 + docs/generated-html/functions_func_p.html | 95 + docs/generated-html/functions_func_r.html | 99 + docs/generated-html/functions_func_s.html | 129 + docs/generated-html/functions_func_t.html | 114 + docs/generated-html/functions_func_u.html | 86 + docs/generated-html/functions_func_v.html | 91 + docs/generated-html/functions_func_w.html | 86 + docs/generated-html/functions_g.html | 231 + docs/generated-html/functions_h.html | 86 + docs/generated-html/functions_i.html | 268 + docs/generated-html/functions_k.html | 376 + docs/generated-html/functions_l.html | 126 + docs/generated-html/functions_m.html | 121 + docs/generated-html/functions_n.html | 94 + docs/generated-html/functions_o.html | 213 + docs/generated-html/functions_p.html | 164 + docs/generated-html/functions_r.html | 107 + docs/generated-html/functions_s.html | 383 + docs/generated-html/functions_t.html | 280 + docs/generated-html/functions_type.html | 123 + docs/generated-html/functions_type_b.html | 111 + docs/generated-html/functions_type_c.html | 94 + docs/generated-html/functions_type_d.html | 109 + docs/generated-html/functions_type_e.html | 96 + docs/generated-html/functions_type_f.html | 141 + docs/generated-html/functions_type_g.html | 183 + docs/generated-html/functions_type_i.html | 160 + docs/generated-html/functions_type_l.html | 86 + docs/generated-html/functions_type_m.html | 98 + docs/generated-html/functions_type_n.html | 86 + docs/generated-html/functions_type_o.html | 103 + docs/generated-html/functions_type_p.html | 112 + docs/generated-html/functions_type_s.html | 278 + docs/generated-html/functions_type_t.html | 227 + docs/generated-html/functions_type_v.html | 91 + docs/generated-html/functions_type_w.html | 90 + docs/generated-html/functions_type_y.html | 86 + docs/generated-html/functions_u.html | 86 + docs/generated-html/functions_v.html | 124 + docs/generated-html/functions_vars.html | 92 + docs/generated-html/functions_vars_b.html | 91 + docs/generated-html/functions_vars_c.html | 89 + docs/generated-html/functions_vars_d.html | 95 + docs/generated-html/functions_vars_e.html | 87 + docs/generated-html/functions_vars_f.html | 95 + docs/generated-html/functions_vars_g.html | 92 + docs/generated-html/functions_vars_i.html | 117 + docs/generated-html/functions_vars_k.html | 327 + docs/generated-html/functions_vars_l.html | 103 + docs/generated-html/functions_vars_m.html | 91 + docs/generated-html/functions_vars_n.html | 91 + docs/generated-html/functions_vars_p.html | 117 + docs/generated-html/functions_vars_r.html | 91 + docs/generated-html/functions_vars_s.html | 144 + docs/generated-html/functions_vars_t.html | 103 + docs/generated-html/functions_vars_v.html | 90 + docs/generated-html/functions_w.html | 93 + docs/generated-html/functions_y.html | 86 + docs/generated-html/gemm_8h.html | 123 + docs/generated-html/gemm_8h_source.html | 130 + docs/generated-html/gemm__epilogue_8h.html | 120 + .../gemm__epilogue_8h_source.html | 130 + .../gemm__epilogue__traits_8h.html | 128 + .../gemm__epilogue__traits_8h_source.html | 160 + .../gemm__fragment__stream_8h.html | 119 + .../gemm__fragment__stream_8h_source.html | 148 + .../gemm__global__stream_8h.html | 119 + .../gemm__global__stream_8h_source.html | 130 + .../generated-html/gemm__global__tile_8h.html | 136 + .../gemm__global__tile_8h_source.html | 215 + docs/generated-html/gemm__operand_8h.html | 134 + .../gemm__operand_8h_source.html | 117 + .../gemm__shared__stream_8h.html | 112 + .../gemm__shared__stream_8h_source.html | 112 + .../generated-html/gemm__shared__tile_8h.html | 135 + .../gemm__shared__tile_8h_source.html | 214 + docs/generated-html/gemm__traits_8h.html | 151 + .../gemm__traits_8h_source.html | 252 + docs/generated-html/globals.html | 147 + docs/generated-html/globals_defs.html | 144 + docs/generated-html/globals_func.html | 84 + .../group__fragment__concept.html | 102 + .../group__fragment__iterator__concept.html | 99 + .../group__layout__concept.html | 108 + .../group__predicate__iterator__concept.html | 106 + .../group__predicate__tile__adapter.html | 88 + .../group__predicate__vector__concept.html | 100 + .../group__tile__load__iterator__concept.html | 104 + ...group__tile__store__iterator__concept.html | 104 + .../group__tile__traits__concept.html | 101 + .../hgemm__global__tile_8h.html | 115 + .../hgemm__global__tile_8h_source.html | 111 + .../hgemm__multiply__add_8h.html | 111 + .../hgemm__multiply__add_8h_source.html | 107 + docs/generated-html/hgemm__swizzle_8h.html | 110 + .../hgemm__swizzle_8h_source.html | 100 + docs/generated-html/hgemm__traits_8h.html | 143 + .../hgemm__traits_8h_source.html | 166 + docs/generated-html/hierarchy.html | 411 + .../identity__block__swizzle_8h.html | 110 + .../identity__block__swizzle_8h_source.html | 91 + docs/generated-html/igemm__epilogue_8h.html | 135 + .../igemm__epilogue_8h_source.html | 168 + .../igemm__global__tile_8h.html | 116 + .../igemm__global__tile_8h_source.html | 110 + .../igemm__multiply__add_8h.html | 111 + .../igemm__multiply__add_8h_source.html | 106 + docs/generated-html/igemm__swizzle_8h.html | 109 + .../igemm__swizzle_8h_source.html | 100 + docs/generated-html/igemm__traits_8h.html | 150 + .../igemm__traits_8h_source.html | 166 + docs/generated-html/index.html | 83 + docs/generated-html/iterator__access_8h.html | 175 + .../iterator__access_8h_source.html | 107 + docs/generated-html/jquery.js | 115 + docs/generated-html/linear__scaling_8h.html | 113 + .../linear__scaling_8h_source.html | 102 + docs/generated-html/load__store_8h.html | 128 + .../generated-html/load__store_8h_source.html | 118 + docs/generated-html/matrix__traits_8h.html | 110 + .../matrix__traits_8h_source.html | 98 + docs/generated-html/menu.js | 50 + docs/generated-html/menudata.js | 151 + docs/generated-html/modules.html | 96 + docs/generated-html/namespacecutlass.html | 1557 ++++ .../namespacecutlass_1_1gemm.html | 371 + .../namespacecutlass_1_1platform.html | 938 ++ docs/generated-html/namespacemembers.html | 214 + .../generated-html/namespacemembers_func.html | 200 + .../generated-html/namespacemembers_type.html | 87 + docs/generated-html/namespacenv__std.html | 934 ++ docs/generated-html/namespaces.html | 90 + docs/generated-html/nav_f.png | Bin 0 -> 154 bytes docs/generated-html/nav_g.png | Bin 0 -> 95 bytes docs/generated-html/nav_h.png | Bin 0 -> 97 bytes docs/generated-html/nv__std_8h.html | 630 ++ docs/generated-html/nv__std_8h_source.html | 173 + docs/generated-html/open.png | Bin 0 -> 123 bytes docs/generated-html/platform_8h.html | 632 ++ docs/generated-html/platform_8h_source.html | 173 + docs/generated-html/predicate__vector_8h.html | 129 + .../predicate__vector_8h_source.html | 155 + docs/generated-html/reshape__tile_8h.html | 109 + .../reshape__tile_8h_source.html | 93 + docs/generated-html/search/all_0.html | 30 + docs/generated-html/search/all_0.js | 8 + docs/generated-html/search/all_1.html | 30 + docs/generated-html/search/all_1.js | 31 + docs/generated-html/search/all_10.html | 30 + docs/generated-html/search/all_10.js | 20 + docs/generated-html/search/all_11.html | 30 + docs/generated-html/search/all_11.js | 89 + docs/generated-html/search/all_12.html | 30 + docs/generated-html/search/all_12.js | 54 + docs/generated-html/search/all_13.html | 30 + docs/generated-html/search/all_13.js | 4 + docs/generated-html/search/all_14.html | 30 + docs/generated-html/search/all_14.js | 15 + docs/generated-html/search/all_15.html | 30 + docs/generated-html/search/all_15.js | 12 + docs/generated-html/search/all_16.html | 30 + docs/generated-html/search/all_16.js | 4 + docs/generated-html/search/all_17.html | 30 + docs/generated-html/search/all_17.js | 4 + docs/generated-html/search/all_2.html | 30 + docs/generated-html/search/all_2.js | 10 + docs/generated-html/search/all_3.html | 30 + docs/generated-html/search/all_3.js | 59 + docs/generated-html/search/all_4.html | 30 + docs/generated-html/search/all_4.js | 19 + docs/generated-html/search/all_5.html | 30 + docs/generated-html/search/all_5.js | 14 + docs/generated-html/search/all_6.html | 30 + docs/generated-html/search/all_6.js | 32 + docs/generated-html/search/all_7.html | 30 + docs/generated-html/search/all_7.js | 85 + docs/generated-html/search/all_8.html | 30 + docs/generated-html/search/all_8.js | 22 + docs/generated-html/search/all_9.html | 30 + docs/generated-html/search/all_9.js | 107 + docs/generated-html/search/all_a.html | 30 + docs/generated-html/search/all_a.js | 76 + docs/generated-html/search/all_b.html | 30 + docs/generated-html/search/all_b.js | 27 + docs/generated-html/search/all_c.html | 30 + docs/generated-html/search/all_c.js | 20 + docs/generated-html/search/all_d.html | 30 + docs/generated-html/search/all_d.js | 8 + docs/generated-html/search/all_e.html | 30 + docs/generated-html/search/all_e.js | 34 + docs/generated-html/search/all_f.html | 30 + docs/generated-html/search/all_f.js | 26 + docs/generated-html/search/classes_0.html | 30 + docs/generated-html/search/classes_0.js | 22 + docs/generated-html/search/classes_1.html | 30 + docs/generated-html/search/classes_1.js | 4 + docs/generated-html/search/classes_10.html | 30 + docs/generated-html/search/classes_10.js | 4 + docs/generated-html/search/classes_11.html | 30 + docs/generated-html/search/classes_11.js | 10 + docs/generated-html/search/classes_12.html | 30 + docs/generated-html/search/classes_12.js | 5 + docs/generated-html/search/classes_2.html | 30 + docs/generated-html/search/classes_2.js | 23 + docs/generated-html/search/classes_3.html | 30 + docs/generated-html/search/classes_3.js | 9 + docs/generated-html/search/classes_4.html | 30 + docs/generated-html/search/classes_4.js | 8 + docs/generated-html/search/classes_5.html | 30 + docs/generated-html/search/classes_5.js | 14 + docs/generated-html/search/classes_6.html | 30 + docs/generated-html/search/classes_6.js | 49 + docs/generated-html/search/classes_7.html | 30 + docs/generated-html/search/classes_7.js | 18 + docs/generated-html/search/classes_8.html | 30 + docs/generated-html/search/classes_8.js | 77 + docs/generated-html/search/classes_9.html | 30 + docs/generated-html/search/classes_9.js | 14 + docs/generated-html/search/classes_a.html | 30 + docs/generated-html/search/classes_a.js | 6 + docs/generated-html/search/classes_b.html | 30 + docs/generated-html/search/classes_b.js | 4 + docs/generated-html/search/classes_c.html | 30 + docs/generated-html/search/classes_c.js | 15 + docs/generated-html/search/classes_d.html | 30 + docs/generated-html/search/classes_d.js | 12 + docs/generated-html/search/classes_e.html | 30 + docs/generated-html/search/classes_e.js | 34 + docs/generated-html/search/classes_f.html | 30 + docs/generated-html/search/classes_f.js | 24 + docs/generated-html/search/close.png | Bin 0 -> 273 bytes docs/generated-html/search/defines_0.html | 30 + docs/generated-html/search/defines_0.js | 7 + docs/generated-html/search/defines_1.html | 30 + docs/generated-html/search/defines_1.js | 17 + docs/generated-html/search/defines_2.html | 30 + docs/generated-html/search/defines_2.js | 5 + docs/generated-html/search/defines_3.html | 30 + docs/generated-html/search/defines_3.js | 4 + docs/generated-html/search/enums_0.html | 30 + docs/generated-html/search/enums_0.js | 4 + docs/generated-html/search/enumvalues_0.html | 30 + docs/generated-html/search/enumvalues_0.js | 4 + docs/generated-html/search/enumvalues_1.html | 30 + docs/generated-html/search/enumvalues_1.js | 19 + docs/generated-html/search/enumvalues_2.html | 30 + docs/generated-html/search/enumvalues_2.js | 4 + docs/generated-html/search/enumvalues_3.html | 30 + docs/generated-html/search/enumvalues_3.js | 4 + docs/generated-html/search/files_0.html | 30 + docs/generated-html/search/files_0.js | 9 + docs/generated-html/search/files_1.html | 30 + docs/generated-html/search/files_1.js | 5 + docs/generated-html/search/files_2.html | 30 + docs/generated-html/search/files_2.js | 6 + docs/generated-html/search/files_3.html | 30 + docs/generated-html/search/files_3.js | 12 + docs/generated-html/search/files_4.html | 30 + docs/generated-html/search/files_4.js | 7 + docs/generated-html/search/files_5.html | 30 + docs/generated-html/search/files_5.js | 10 + docs/generated-html/search/files_6.html | 30 + docs/generated-html/search/files_6.js | 5 + docs/generated-html/search/files_7.html | 30 + docs/generated-html/search/files_7.js | 4 + docs/generated-html/search/files_8.html | 30 + docs/generated-html/search/files_8.js | 5 + docs/generated-html/search/files_9.html | 30 + docs/generated-html/search/files_9.js | 4 + docs/generated-html/search/files_a.html | 30 + docs/generated-html/search/files_a.js | 5 + docs/generated-html/search/files_b.html | 30 + docs/generated-html/search/files_b.js | 8 + docs/generated-html/search/files_c.html | 30 + docs/generated-html/search/files_c.js | 4 + docs/generated-html/search/files_d.html | 30 + docs/generated-html/search/files_d.js | 9 + docs/generated-html/search/files_e.html | 30 + docs/generated-html/search/files_e.js | 9 + docs/generated-html/search/functions_0.html | 30 + docs/generated-html/search/functions_0.js | 4 + docs/generated-html/search/functions_1.html | 30 + docs/generated-html/search/functions_1.js | 5 + docs/generated-html/search/functions_10.html | 30 + docs/generated-html/search/functions_10.js | 11 + docs/generated-html/search/functions_11.html | 30 + docs/generated-html/search/functions_11.js | 4 + docs/generated-html/search/functions_12.html | 30 + docs/generated-html/search/functions_12.js | 4 + docs/generated-html/search/functions_13.html | 30 + docs/generated-html/search/functions_13.js | 4 + docs/generated-html/search/functions_14.html | 30 + docs/generated-html/search/functions_14.js | 4 + docs/generated-html/search/functions_2.html | 30 + docs/generated-html/search/functions_2.js | 4 + docs/generated-html/search/functions_3.html | 30 + docs/generated-html/search/functions_3.js | 19 + docs/generated-html/search/functions_4.html | 30 + docs/generated-html/search/functions_4.js | 5 + docs/generated-html/search/functions_5.html | 30 + docs/generated-html/search/functions_5.js | 7 + docs/generated-html/search/functions_6.html | 30 + docs/generated-html/search/functions_6.js | 9 + docs/generated-html/search/functions_7.html | 30 + docs/generated-html/search/functions_7.js | 17 + docs/generated-html/search/functions_8.html | 30 + docs/generated-html/search/functions_8.js | 4 + docs/generated-html/search/functions_9.html | 30 + docs/generated-html/search/functions_9.js | 22 + docs/generated-html/search/functions_a.html | 30 + docs/generated-html/search/functions_a.js | 9 + docs/generated-html/search/functions_b.html | 30 + docs/generated-html/search/functions_b.js | 10 + docs/generated-html/search/functions_c.html | 30 + docs/generated-html/search/functions_c.js | 31 + docs/generated-html/search/functions_d.html | 30 + docs/generated-html/search/functions_d.js | 6 + docs/generated-html/search/functions_e.html | 30 + docs/generated-html/search/functions_e.js | 8 + docs/generated-html/search/functions_f.html | 30 + docs/generated-html/search/functions_f.js | 16 + docs/generated-html/search/groups_0.html | 30 + docs/generated-html/search/groups_0.js | 5 + docs/generated-html/search/groups_1.html | 30 + docs/generated-html/search/groups_1.js | 4 + docs/generated-html/search/groups_2.html | 30 + docs/generated-html/search/groups_2.js | 6 + docs/generated-html/search/groups_3.html | 30 + docs/generated-html/search/groups_3.js | 6 + docs/generated-html/search/mag_sel.png | Bin 0 -> 563 bytes docs/generated-html/search/namespaces_0.html | 30 + docs/generated-html/search/namespaces_0.js | 6 + docs/generated-html/search/namespaces_1.html | 30 + docs/generated-html/search/namespaces_1.js | 4 + docs/generated-html/search/nomatches.html | 12 + docs/generated-html/search/search.css | 271 + docs/generated-html/search/search.js | 814 ++ docs/generated-html/search/search_l.png | Bin 0 -> 604 bytes docs/generated-html/search/search_m.png | Bin 0 -> 158 bytes docs/generated-html/search/search_r.png | Bin 0 -> 612 bytes docs/generated-html/search/searchdata.js | 45 + docs/generated-html/search/typedefs_0.html | 30 + docs/generated-html/search/typedefs_0.js | 7 + docs/generated-html/search/typedefs_1.html | 30 + docs/generated-html/search/typedefs_1.js | 6 + docs/generated-html/search/typedefs_10.html | 30 + docs/generated-html/search/typedefs_10.js | 4 + docs/generated-html/search/typedefs_11.html | 30 + docs/generated-html/search/typedefs_11.js | 4 + docs/generated-html/search/typedefs_2.html | 30 + docs/generated-html/search/typedefs_2.js | 6 + docs/generated-html/search/typedefs_3.html | 30 + docs/generated-html/search/typedefs_3.js | 5 + docs/generated-html/search/typedefs_4.html | 30 + docs/generated-html/search/typedefs_4.js | 6 + docs/generated-html/search/typedefs_5.html | 30 + docs/generated-html/search/typedefs_5.js | 14 + docs/generated-html/search/typedefs_6.html | 30 + docs/generated-html/search/typedefs_6.js | 23 + docs/generated-html/search/typedefs_7.html | 30 + docs/generated-html/search/typedefs_7.js | 11 + docs/generated-html/search/typedefs_8.html | 30 + docs/generated-html/search/typedefs_8.js | 4 + docs/generated-html/search/typedefs_9.html | 30 + docs/generated-html/search/typedefs_9.js | 6 + docs/generated-html/search/typedefs_a.html | 30 + docs/generated-html/search/typedefs_a.js | 4 + docs/generated-html/search/typedefs_b.html | 30 + docs/generated-html/search/typedefs_b.js | 6 + docs/generated-html/search/typedefs_c.html | 30 + docs/generated-html/search/typedefs_c.js | 6 + docs/generated-html/search/typedefs_d.html | 30 + docs/generated-html/search/typedefs_d.js | 31 + docs/generated-html/search/typedefs_e.html | 30 + docs/generated-html/search/typedefs_e.js | 21 + docs/generated-html/search/typedefs_f.html | 30 + docs/generated-html/search/typedefs_f.js | 5 + docs/generated-html/search/variables_0.html | 30 + docs/generated-html/search/variables_0.js | 5 + docs/generated-html/search/variables_1.html | 30 + docs/generated-html/search/variables_1.js | 5 + docs/generated-html/search/variables_10.html | 30 + docs/generated-html/search/variables_10.js | 5 + docs/generated-html/search/variables_2.html | 30 + docs/generated-html/search/variables_2.js | 5 + docs/generated-html/search/variables_3.html | 30 + docs/generated-html/search/variables_3.js | 7 + docs/generated-html/search/variables_4.html | 30 + docs/generated-html/search/variables_4.js | 4 + docs/generated-html/search/variables_5.html | 30 + docs/generated-html/search/variables_5.js | 7 + docs/generated-html/search/variables_6.html | 30 + docs/generated-html/search/variables_6.js | 6 + docs/generated-html/search/variables_7.html | 30 + docs/generated-html/search/variables_7.js | 12 + docs/generated-html/search/variables_8.html | 30 + docs/generated-html/search/variables_8.js | 64 + docs/generated-html/search/variables_9.html | 30 + docs/generated-html/search/variables_9.js | 9 + docs/generated-html/search/variables_a.html | 30 + docs/generated-html/search/variables_a.js | 5 + docs/generated-html/search/variables_b.html | 30 + docs/generated-html/search/variables_b.js | 4 + docs/generated-html/search/variables_c.html | 30 + docs/generated-html/search/variables_c.js | 9 + docs/generated-html/search/variables_d.html | 30 + docs/generated-html/search/variables_d.js | 5 + docs/generated-html/search/variables_e.html | 30 + docs/generated-html/search/variables_e.js | 19 + docs/generated-html/search/variables_f.html | 30 + docs/generated-html/search/variables_f.js | 8 + docs/generated-html/sgemm__traits_8h.html | 117 + .../sgemm__traits_8h_source.html | 103 + docs/generated-html/shape_8h.html | 154 + docs/generated-html/shape_8h_source.html | 120 + docs/generated-html/splitbar.png | Bin 0 -> 310 bytes .../structcutlass_1_1AlignedStruct.html | 101 + .../structcutlass_1_1AlignedStruct.png | Bin 0 -> 1116 bytes ...ass_1_1ComputeOffsetFromShape-members.html | 91 + ...ructcutlass_1_1ComputeOffsetFromShape.html | 165 + ...__00_01kSw___00_011_01_4_01_4-members.html | 91 + ...0_01kSh___00_01kSw___00_011_01_4_01_4.html | 166 + ..._01kSw___00_01kSc___01_4_01_4-members.html | 91 + ...kSh___00_01kSw___00_01kSc___01_4_01_4.html | 167 + ...s_1_1ComputeOffsetFromStrides-members.html | 91 + ...ctcutlass_1_1ComputeOffsetFromStrides.html | 165 + ..._00_01S__w___00_011_01_4_01_4-members.html | 91 + ...01S__h___00_01S__w___00_011_01_4_01_4.html | 166 + ...1S__w___00_01S__c___01_4_01_4-members.html | 91 + ...h___00_01S__w___00_01S__c___01_4_01_4.html | 167 + ...omputeThreadOffsetFromStrides-members.html | 91 + ...ass_1_1ComputeThreadOffsetFromStrides.html | 143 + ...011_03ed682791cf043da79a7cc93228a8c85.html | 91 + ...011_0e75281d7e02fa191f5d498e10e25dc1b.html | 132 + ...01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html | 132 + ...01T__f2e6d84a53db391977c787a65ed62aca.html | 91 + ..._1_1ConstPredicateTileAdapter-members.html | 94 + ...tcutlass_1_1ConstPredicateTileAdapter.html | 231 + .../structcutlass_1_1Convert.html | 92 + ...3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html | 95 + ...3_01Ofca5985d18bcb54bc1f49355f3cee121.html | 265 + .../structcutlass_1_1Coord-members.html | 114 + .../structcutlass_1_1Coord.html | 869 ++ .../structcutlass_1_1Copy-members.html | 95 + .../generated-html/structcutlass_1_1Copy.html | 265 + .../structcutlass_1_1Extent-members.html | 91 + .../structcutlass_1_1Extent.html | 129 + ...or_3_01T_00_01Lanes_01_4_01_4-members.html | 91 + ...3_01Vector_3_01T_00_01Lanes_01_4_01_4.html | 129 + ..._00_01Lanes_01_4_01const_01_4-members.html | 91 + ...or_3_01T_00_01Lanes_01_4_01const_01_4.html | 129 + .../structcutlass_1_1Fragment-members.html | 96 + .../structcutlass_1_1Fragment.html | 280 + .../structcutlass_1_1Fragment.png | Bin 0 -> 1117 bytes ...lass_1_1FragmentConstIterator-members.html | 104 + ...tructcutlass_1_1FragmentConstIterator.html | 506 ++ ...ctcutlass_1_1FragmentIterator-members.html | 105 + .../structcutlass_1_1FragmentIterator.html | 562 ++ .../structcutlass_1_1FragmentLoad.html | 92 + ...__00_29bcae86cc02cb793583fe6b659e7a83.html | 92 + ...__00_9bf6f8f94e2cd7f3702b853d418a9863.html | 171 + ...lar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html | 92 + ...lar__a157bdca477e8efca5bc9cda0db6db8e.html | 171 + .../structcutlass_1_1FragmentStore.html | 92 + ...___0039852e55b713e99520c56b76ce64b290.html | 92 + ...___0087787c90510d0c4c07703b5a90c263de.html | 171 + ...alar_00c2299561c3ffbb17f8afc6add32eba.html | 171 + ...alar_dea9a5a5c980336e8c43a15909be3cdb.html | 92 + ...ructcutlass_1_1FragmentStream-members.html | 110 + .../structcutlass_1_1FragmentStream.html | 598 ++ ...s_1_1FragmentStream_1_1Params-members.html | 95 + ...ctcutlass_1_1FragmentStream_1_1Params.html | 230 + .../structcutlass_1_1GemmOperand-members.html | 95 + .../structcutlass_1_1GemmOperand.html | 129 + .../structcutlass_1_1Identity-members.html | 93 + .../structcutlass_1_1Identity.html | 126 + ...uctcutlass_1_1IteratorAdvance-members.html | 94 + .../structcutlass_1_1IteratorAdvance.html | 127 + ...ctcutlass_1_1IteratorFragment-members.html | 93 + .../structcutlass_1_1IteratorFragment.html | 125 + .../structcutlass_1_1Load-members.html | 92 + .../generated-html/structcutlass_1_1Load.html | 171 + ...mory___00_01true_00_0116_01_4-members.html | 92 + ..._00_01Memory___00_01true_00_0116_01_4.html | 171 + ...emory___00_01true_00_014_01_4-members.html | 92 + ...__00_01Memory___00_01true_00_014_01_4.html | 171 + ...emory___00_01true_00_018_01_4-members.html | 92 + ...__00_01Memory___00_01true_00_018_01_4.html | 171 + ...mory___00_01true_00_0116_01_4-members.html | 92 + ..._00_01Memory___00_01true_00_0116_01_4.html | 171 + ...structcutlass_1_1MatrixLayout-members.html | 93 + .../structcutlass_1_1MatrixLayout.html | 125 + .../structcutlass_1_1MemorySpace-members.html | 94 + .../structcutlass_1_1MemorySpace.html | 127 + ...tlass_1_1PredicateTileAdapter-members.html | 95 + ...structcutlass_1_1PredicateTileAdapter.html | 290 + ...uctcutlass_1_1PredicateVector-members.html | 108 + .../structcutlass_1_1PredicateVector.html | 658 ++ ...cateVector_1_1TrivialIterator-members.html | 96 + ...1_1PredicateVector_1_1TrivialIterator.html | 287 + .../structcutlass_1_1ReshapeTile-members.html | 91 + .../structcutlass_1_1ReshapeTile.html | 118 + ...1kAccessSize___00_01true_01_4-members.html | 91 + ...e___00_01kAccessSize___00_01true_01_4.html | 118 + .../structcutlass_1_1Shape-members.html | 94 + .../structcutlass_1_1Shape.html | 211 + .../structcutlass_1_1ShapeAdd-members.html | 91 + .../structcutlass_1_1ShapeAdd.html | 118 + .../structcutlass_1_1ShapeCount-members.html | 96 + .../structcutlass_1_1ShapeCount.html | 265 + .../structcutlass_1_1ShapeDiv-members.html | 91 + .../structcutlass_1_1ShapeDiv.html | 118 + .../structcutlass_1_1ShapeMax-members.html | 91 + .../structcutlass_1_1ShapeMax.html | 118 + .../structcutlass_1_1ShapeMin-members.html | 91 + .../structcutlass_1_1ShapeMin.html | 118 + .../structcutlass_1_1ShapeMul-members.html | 91 + .../structcutlass_1_1ShapeMul.html | 118 + .../structcutlass_1_1ShapeScale-members.html | 91 + .../structcutlass_1_1ShapeScale.html | 118 + ...structcutlass_1_1ShapeStrides-members.html | 91 + .../structcutlass_1_1ShapeStrides.html | 118 + .../structcutlass_1_1ShapeSub-members.html | 91 + .../structcutlass_1_1ShapeSub.html | 118 + .../structcutlass_1_1StorageType-members.html | 91 + .../structcutlass_1_1StorageType.html | 118 + ...ass_1_1StorageType_3_011_01_4-members.html | 91 + ...ructcutlass_1_1StorageType_3_011_01_4.html | 116 + ...ass_1_1StorageType_3_012_01_4-members.html | 91 + ...ructcutlass_1_1StorageType_3_012_01_4.html | 116 + ...ass_1_1StorageType_3_014_01_4-members.html | 91 + ...ructcutlass_1_1StorageType_3_014_01_4.html | 116 + .../structcutlass_1_1Store-members.html | 92 + .../structcutlass_1_1Store.html | 171 + ...mory___00_01true_00_0116_01_4-members.html | 92 + ..._00_01Memory___00_01true_00_0116_01_4.html | 171 + ...emory___00_01true_00_014_01_4-members.html | 92 + ...__00_01Memory___00_01true_00_014_01_4.html | 171 + ...emory___00_01true_00_018_01_4-members.html | 92 + ...__00_01Memory___00_01true_00_018_01_4.html | 171 + ...mory___00_01true_00_0116_01_4-members.html | 92 + ..._00_01Memory___00_01true_00_0116_01_4.html | 171 + ...ctcutlass_1_1TileIteratorBase-members.html | 114 + .../structcutlass_1_1TileIteratorBase.html | 695 ++ .../structcutlass_1_1TileIteratorBase.png | Bin 0 -> 2958 bytes ...leIteratorBase_1_1BaseStorage-members.html | 97 + ...ss_1_1TileIteratorBase_1_1BaseStorage.html | 283 + ...1_1TileIteratorBase_1_1Params-members.html | 100 + ...cutlass_1_1TileIteratorBase_1_1Params.html | 391 + ...tcutlass_1_1TileIteratorBase_1_1Params.png | Bin 0 -> 4162 bytes ...ctcutlass_1_1TileLoadIterator-members.html | 135 + .../structcutlass_1_1TileLoadIterator.html | 1253 +++ .../structcutlass_1_1TileLoadIterator.png | Bin 0 -> 2304 bytes ...1_1TileLoadIterator_1_1Params-members.html | 104 + ...cutlass_1_1TileLoadIterator_1_1Params.html | 350 + ...tcutlass_1_1TileLoadIterator_1_1Params.png | Bin 0 -> 3293 bytes ...tcutlass_1_1TileStoreIterator-members.html | 133 + .../structcutlass_1_1TileStoreIterator.html | 1210 +++ .../structcutlass_1_1TileStoreIterator.png | Bin 0 -> 2309 bytes ..._1TileStoreIterator_1_1Params-members.html | 104 + ...utlass_1_1TileStoreIterator_1_1Params.html | 350 + ...cutlass_1_1TileStoreIterator_1_1Params.png | Bin 0 -> 2388 bytes .../structcutlass_1_1TileTraits-members.html | 94 + .../structcutlass_1_1TileTraits.html | 179 + ..._1_1TileTraitsContiguousMajor-members.html | 96 + ...tcutlass_1_1TileTraitsContiguousMajor.html | 232 + ...cutlass_1_1TileTraitsStandard-members.html | 94 + .../structcutlass_1_1TileTraitsStandard.html | 208 + ...lass_1_1TileTraitsStrideMajor-members.html | 96 + ...tructcutlass_1_1TileTraitsStrideMajor.html | 232 + ...cutlass_1_1TileTraitsWarpRake-members.html | 99 + .../structcutlass_1_1TileTraitsWarpRake.html | 326 + ...raitsWarpRake_1_1ThreadOffset-members.html | 91 + ...1_1TileTraitsWarpRake_1_1ThreadOffset.html | 133 + ...tcutlass_1_1TiledThreadOffset-members.html | 91 + .../structcutlass_1_1TiledThreadOffset.html | 133 + ..._1TrivialPredicateTileAdapter-members.html | 92 + ...utlass_1_1TrivialPredicateTileAdapter.html | 183 + ...structcutlass_1_1VectorTraits-members.html | 94 + .../structcutlass_1_1VectorTraits.html | 200 + ...or_3_01T_00_01Lanes_01_4_01_4-members.html | 94 + ...3_01Vector_3_01T_00_01Lanes_01_4_01_4.html | 200 + ..._00_01Lanes_01_4_01const_01_4-members.html | 94 + ...or_3_01T_00_01Lanes_01_4_01const_01_4.html | 200 + .../structcutlass_1_1Vectorize-members.html | 91 + .../structcutlass_1_1Vectorize.html | 118 + ...ize_3_01Element___00_011_01_4-members.html | 91 + ..._1Vectorize_3_01Element___00_011_01_4.html | 118 + ...ructcutlass_1_1divide__assert-members.html | 91 + .../structcutlass_1_1divide__assert.html | 127 + ..._1_1gemm_1_1ClearAccumulators-members.html | 92 + ...tcutlass_1_1gemm_1_1ClearAccumulators.html | 173 + ...1_1ClearAccumulators_1_1SharedStorage.html | 95 + ...utlass_1_1gemm_1_1DgemmConfig-members.html | 115 + .../structcutlass_1_1gemm_1_1DgemmConfig.html | 177 + .../structcutlass_1_1gemm_1_1DgemmConfig.png | Bin 0 -> 2957 bytes ...utlass_1_1gemm_1_1DgemmTraits-members.html | 111 + .../structcutlass_1_1gemm_1_1DgemmTraits.html | 173 + .../structcutlass_1_1gemm_1_1DgemmTraits.png | Bin 0 -> 8483 bytes ..._1gemm_1_1FragmentMultiplyAdd-members.html | 97 + ...utlass_1_1gemm_1_1FragmentMultiplyAdd.html | 318 + ...mentMultiplyAdd_3_01half_01_4-members.html | 97 + ..._1_1FragmentMultiplyAdd_3_01half_01_4.html | 304 + ...structcutlass_1_1gemm_1_1Gemm-members.html | 106 + .../structcutlass_1_1gemm_1_1Gemm.html | 522 ++ ...cutlass_1_1gemm_1_1GemmConfig-members.html | 115 + .../structcutlass_1_1gemm_1_1GemmConfig.html | 693 ++ ...ctcutlass_1_1gemm_1_1GemmDesc-members.html | 103 + .../structcutlass_1_1gemm_1_1GemmDesc.html | 344 + ...tlass_1_1gemm_1_1GemmEpilogue-members.html | 118 + ...structcutlass_1_1gemm_1_1GemmEpilogue.html | 755 ++ .../structcutlass_1_1gemm_1_1GemmEpilogue.png | Bin 0 -> 1600 bytes ...1_1gemm_1_1GemmEpilogueTraits-members.html | 106 + ...cutlass_1_1gemm_1_1GemmEpilogueTraits.html | 418 + ...m_1_1GemmEpilogueTraitsHelper-members.html | 106 + ...s_1_1gemm_1_1GemmEpilogueTraitsHelper.html | 403 + ...1GemmEpilogueTraits_1_1Params-members.html | 98 + ...1gemm_1_1GemmEpilogueTraits_1_1Params.html | 274 + ...ilogueTraits_1_1SharedStorage-members.html | 91 + ..._1GemmEpilogueTraits_1_1SharedStorage.html | 121 + ...1_1gemm_1_1GemmFragmentStream-members.html | 116 + ...cutlass_1_1gemm_1_1GemmFragmentStream.html | 652 ++ ...tcutlass_1_1gemm_1_1GemmFragmentStream.png | Bin 0 -> 3703 bytes ...m_1_1GemmFragmentStreamTraits-members.html | 103 + ...s_1_1gemm_1_1GemmFragmentStreamTraits.html | 387 + ...1GemmFragmentStream_1_1Params-members.html | 91 + ...1gemm_1_1GemmFragmentStream_1_1Params.html | 161 + ..._1gemm_1_1GemmFragmentStream_1_1Params.png | Bin 0 -> 950 bytes ...1gemm_1_1GemmGlobalIteratorAb-members.html | 142 + ...tlass_1_1gemm_1_1GemmGlobalIteratorAb.html | 912 ++ ...utlass_1_1gemm_1_1GemmGlobalIteratorAb.png | Bin 0 -> 3492 bytes ...emmGlobalIteratorAb_1_1Params-members.html | 105 + ...emm_1_1GemmGlobalIteratorAb_1_1Params.html | 193 + ...gemm_1_1GemmGlobalIteratorAb_1_1Params.png | Bin 0 -> 3308 bytes ...GemmGlobalIteratorAb_1_1SharedStorage.html | 95 + ...1gemm_1_1GemmGlobalIteratorCd-members.html | 131 + ...tlass_1_1gemm_1_1GemmGlobalIteratorCd.html | 783 ++ ...utlass_1_1gemm_1_1GemmGlobalIteratorCd.png | Bin 0 -> 1734 bytes ...1_1GemmGlobalIteratorCdTraits-members.html | 105 + ...1_1gemm_1_1GemmGlobalIteratorCdTraits.html | 280 + ..._1_1gemm_1_1GemmGlobalIteratorCdTraits.png | Bin 0 -> 2231 bytes ...ratorCdTraits_1_1ThreadOffset-members.html | 91 + ...lobalIteratorCdTraits_1_1ThreadOffset.html | 132 + ...emmGlobalIteratorCd_1_1Params-members.html | 98 + ...emm_1_1GemmGlobalIteratorCd_1_1Params.html | 298 + ...GemmGlobalIteratorCd_1_1SharedStorage.html | 95 + ...emm_1_1GemmGlobalTileCdTraits-members.html | 105 + ...ass_1_1gemm_1_1GemmGlobalTileCdTraits.html | 298 + ...lass_1_1gemm_1_1GemmGlobalTileCdTraits.png | Bin 0 -> 2215 bytes ...lTileCdTraits_1_1ThreadOffset-members.html | 91 + ...emmGlobalTileCdTraits_1_1ThreadOffset.html | 132 + ...1gemm_1_1GemmGlobalTileTraits-members.html | 103 + ...tlass_1_1gemm_1_1GemmGlobalTileTraits.html | 400 + ...utlass_1_1gemm_1_1GemmGlobalTileTraits.png | Bin 0 -> 2856 bytes ...balTileTraits_1_1ThreadOffset-members.html | 91 + ...1GemmGlobalTileTraits_1_1ThreadOffset.html | 132 + ...emm_1_1GemmMultiplicandTraits-members.html | 95 + ...ass_1_1gemm_1_1GemmMultiplicandTraits.html | 228 + ..._1gemm_1_1GemmOperandTraitsAb-members.html | 91 + ...utlass_1_1gemm_1_1GemmOperandTraitsAb.html | 129 + ...GemmSharedLoadIteratorATraits-members.html | 106 + ...gemm_1_1GemmSharedLoadIteratorATraits.html | 463 + ...eratorATraits_1_1ThreadOffset-members.html | 91 + ...edLoadIteratorATraits_1_1ThreadOffset.html | 132 + ...GemmSharedLoadIteratorBTraits-members.html | 106 + ...gemm_1_1GemmSharedLoadIteratorBTraits.html | 463 + ...eratorBTraits_1_1ThreadOffset-members.html | 91 + ...edLoadIteratorBTraits_1_1ThreadOffset.html | 132 + ...GemmSharedLoadIteratorDTraits-members.html | 107 + ...gemm_1_1GemmSharedLoadIteratorDTraits.html | 504 ++ ...eratorDTraits_1_1ThreadOffset-members.html | 91 + ...edLoadIteratorDTraits_1_1ThreadOffset.html | 132 + ..._1_1GemmSharedLoadTileATraits-members.html | 107 + ..._1_1gemm_1_1GemmSharedLoadTileATraits.html | 482 + ...adTileATraits_1_1ThreadOffset-members.html | 91 + ...SharedLoadTileATraits_1_1ThreadOffset.html | 132 + ..._1_1GemmSharedLoadTileBTraits-members.html | 107 + ..._1_1gemm_1_1GemmSharedLoadTileBTraits.html | 482 + ...adTileBTraits_1_1ThreadOffset-members.html | 91 + ...SharedLoadTileBTraits_1_1ThreadOffset.html | 132 + ..._1_1GemmSharedLoadTileDTraits-members.html | 108 + ..._1_1gemm_1_1GemmSharedLoadTileDTraits.html | 523 ++ ...adTileDTraits_1_1ThreadOffset-members.html | 91 + ...SharedLoadTileDTraits_1_1ThreadOffset.html | 132 + ...mmSharedStoreIteratorAbTraits-members.html | 100 + ...mm_1_1GemmSharedStoreIteratorAbTraits.html | 325 + ...ratorAbTraits_1_1ThreadOffset-members.html | 91 + ...StoreIteratorAbTraits_1_1ThreadOffset.html | 129 + ...emmSharedStoreIteratorDTraits-members.html | 104 + ...emm_1_1GemmSharedStoreIteratorDTraits.html | 426 + ...eratorDTraits_1_1ThreadOffset-members.html | 91 + ...dStoreIteratorDTraits_1_1ThreadOffset.html | 132 + ..._1GemmSharedStoreTileAbTraits-members.html | 101 + ..._1gemm_1_1GemmSharedStoreTileAbTraits.html | 344 + ...eTileAbTraits_1_1ThreadOffset-members.html | 91 + ...aredStoreTileAbTraits_1_1ThreadOffset.html | 129 + ...1_1GemmSharedStoreTileDTraits-members.html | 105 + ...1_1gemm_1_1GemmSharedStoreTileDTraits.html | 445 + ...reTileDTraits_1_1ThreadOffset-members.html | 91 + ...haredStoreTileDTraits_1_1ThreadOffset.html | 132 + ...StoreWithSkewIteratorAbTraits-members.html | 101 + ...mmSharedStoreWithSkewIteratorAbTraits.html | 344 + ...ratorAbTraits_1_1ThreadOffset-members.html | 91 + ...hSkewIteratorAbTraits_1_1ThreadOffset.html | 129 + ...aredStoreWithSkewTileAbTraits-members.html | 102 + ..._1GemmSharedStoreWithSkewTileAbTraits.html | 375 + ...wTileAbTraits_1_1ThreadOffset-members.html | 91 + ...eWithSkewTileAbTraits_1_1ThreadOffset.html | 129 + ...lass_1_1gemm_1_1GemmTileTraitsHelperA.html | 92 + ...nMajor_00_01GemmConfig___01_4-members.html | 96 + ..._1kColumnMajor_00_01GemmConfig___01_4.html | 236 + ...1_1kColumnMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1679 bytes ...wMajor_00_01GemmConfig___01_4-members.html | 97 + ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 263 + ...ut_1_1kRowMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1661 bytes ...lass_1_1gemm_1_1GemmTileTraitsHelperB.html | 92 + ...nMajor_00_01GemmConfig___01_4-members.html | 97 + ..._1kColumnMajor_00_01GemmConfig___01_4.html | 263 + ...1_1kColumnMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1689 bytes ...wMajor_00_01GemmConfig___01_4-members.html | 96 + ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 236 + ...ut_1_1kRowMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1637 bytes ...cutlass_1_1gemm_1_1GemmTraits-members.html | 111 + .../structcutlass_1_1gemm_1_1GemmTraits.html | 568 ++ ...emmTraits_1_1GlobalLoadStream-members.html | 96 + ...emm_1_1GemmTraits_1_1GlobalLoadStream.html | 295 + ...aits_1_1MainLoopSharedStorage-members.html | 93 + ..._1GemmTraits_1_1MainLoopSharedStorage.html | 154 + ...1gemm_1_1GemmTraits_1_1Params-members.html | 99 + ...tlass_1_1gemm_1_1GemmTraits_1_1Params.html | 292 + ...emmTraits_1_1SharedLoadStream-members.html | 102 + ...emm_1_1GemmTraits_1_1SharedLoadStream.html | 418 + ...ass_1_1gemm_1_1Gemm_1_1Params-members.html | 91 + ...ructcutlass_1_1gemm_1_1Gemm_1_1Params.html | 217 + ...tructcutlass_1_1gemm_1_1Gemm_1_1Params.png | Bin 0 -> 788 bytes .../structcutlass_1_1gemm_1_1GetExtent.html | 92 + ...perand_1_1kA_00_01Tile___01_4-members.html | 91 + ..._01GemmOperand_1_1kA_00_01Tile___01_4.html | 126 + ...perand_1_1kB_00_01Tile___01_4-members.html | 91 + ..._01GemmOperand_1_1kB_00_01Tile___01_4.html | 126 + ...s_1_1gemm_1_1GlobalLoadStream-members.html | 112 + ...ctcutlass_1_1gemm_1_1GlobalLoadStream.html | 253 + ...uctcutlass_1_1gemm_1_1GlobalLoadStream.png | Bin 0 -> 1505 bytes ...1gemm_1_1GlobalLoadStreamBase-members.html | 110 + ...tlass_1_1gemm_1_1GlobalLoadStreamBase.html | 602 ++ ...utlass_1_1gemm_1_1GlobalLoadStreamBase.png | Bin 0 -> 1507 bytes ...lobalLoadStreamBase_1_1Params-members.html | 93 + ...emm_1_1GlobalLoadStreamBase_1_1Params.html | 185 + ..._1_1gemm_1_1GlobalStoreStream-members.html | 108 + ...tcutlass_1_1gemm_1_1GlobalStoreStream.html | 562 ++ ..._1GlobalStoreStream_1_1Params-members.html | 92 + ..._1gemm_1_1GlobalStoreStream_1_1Params.html | 168 + ...utlass_1_1gemm_1_1HgemmConfig-members.html | 115 + .../structcutlass_1_1gemm_1_1HgemmConfig.html | 177 + .../structcutlass_1_1gemm_1_1HgemmConfig.png | Bin 0 -> 2911 bytes ...gemmCrosswiseGlobalTileTraits-members.html | 104 + ...emm_1_1HgemmCrosswiseGlobalTileTraits.html | 254 + ...gemm_1_1HgemmCrosswiseGlobalTileTraits.png | Bin 0 -> 2175 bytes ...balTileTraits_1_1ThreadOffset-members.html | 91 + ...swiseGlobalTileTraits_1_1ThreadOffset.html | 132 + ...tlass_1_1gemm_1_1HgemmSwizzle-members.html | 97 + ...structcutlass_1_1gemm_1_1HgemmSwizzle.html | 273 + ...ass_1_1gemm_1_1HgemmTileTraitsHelperA.html | 101 + ...lass_1_1gemm_1_1HgemmTileTraitsHelperA.png | Bin 0 -> 1417 bytes ...wMajor_00_01GemmConfig___01_4-members.html | 98 + ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 211 + ...ut_1_1kRowMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1654 bytes ...ass_1_1gemm_1_1HgemmTileTraitsHelperB.html | 101 + ...lass_1_1gemm_1_1HgemmTileTraitsHelperB.png | Bin 0 -> 1416 bytes ...nMajor_00_01GemmConfig___01_4-members.html | 98 + ..._1kColumnMajor_00_01GemmConfig___01_4.html | 211 + ...1_1kColumnMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1683 bytes ...utlass_1_1gemm_1_1HgemmTraits-members.html | 111 + .../structcutlass_1_1gemm_1_1HgemmTraits.html | 172 + .../structcutlass_1_1gemm_1_1HgemmTraits.png | Bin 0 -> 3190 bytes ..._1_1gemm_1_1HgemmTraitsHelper-members.html | 109 + ...tcutlass_1_1gemm_1_1HgemmTraitsHelper.html | 460 + ...tcutlass_1_1gemm_1_1HgemmTransformerA.html | 92 + ...umnMajor_00_01Iterator___01_4-members.html | 91 + ..._1_1kColumnMajor_00_01Iterator___01_4.html | 118 + ...RowMajor_00_01Iterator___01_4-members.html | 91 + ...out_1_1kRowMajor_00_01Iterator___01_4.html | 118 + ...tcutlass_1_1gemm_1_1HgemmTransformerB.html | 92 + ...umnMajor_00_01Iterator___01_4-members.html | 91 + ..._1_1kColumnMajor_00_01Iterator___01_4.html | 118 + ...RowMajor_00_01Iterator___01_4-members.html | 91 + ...out_1_1kRowMajor_00_01Iterator___01_4.html | 118 + ...1gemm_1_1IdentityBlockSwizzle-members.html | 92 + ...tlass_1_1gemm_1_1IdentityBlockSwizzle.html | 157 + ...utlass_1_1gemm_1_1IgemmConfig-members.html | 115 + .../structcutlass_1_1gemm_1_1IgemmConfig.html | 177 + .../structcutlass_1_1gemm_1_1IgemmConfig.png | Bin 0 -> 2603 bytes ...1AccumulatorsPerThread___01_4-members.html | 115 + ...__t_00_01AccumulatorsPerThread___01_4.html | 177 + ...8__t_00_01AccumulatorsPerThread___01_4.png | Bin 0 -> 2426 bytes ...emmContiguousGlobalTileTraits-members.html | 104 + ...mm_1_1IgemmContiguousGlobalTileTraits.html | 254 + ...emm_1_1IgemmContiguousGlobalTileTraits.png | Bin 0 -> 2171 bytes ...balTileTraits_1_1ThreadOffset-members.html | 91 + ...guousGlobalTileTraits_1_1ThreadOffset.html | 132 + ...lass_1_1gemm_1_1IgemmEpilogue-members.html | 120 + ...tructcutlass_1_1gemm_1_1IgemmEpilogue.html | 275 + ...structcutlass_1_1gemm_1_1IgemmEpilogue.png | Bin 0 -> 1167 bytes ..._1gemm_1_1IgemmEpilogueScalar-members.html | 91 + ...utlass_1_1gemm_1_1IgemmEpilogueScalar.html | 118 + ...mmEpilogueScalar_3_01int_01_4-members.html | 91 + ...m_1_1IgemmEpilogueScalar_3_01int_01_4.html | 116 + ..._1gemm_1_1IgemmEpilogueTraits-members.html | 107 + ...utlass_1_1gemm_1_1IgemmEpilogueTraits.html | 187 + ...cutlass_1_1gemm_1_1IgemmEpilogueTraits.png | Bin 0 -> 3581 bytes ..._1_1IgemmEpilogueTraitsHelper-members.html | 111 + ..._1_1gemm_1_1IgemmEpilogueTraitsHelper.html | 518 ++ ...s_1_1gemm_1_1IgemmEpilogueTraitsHelper.png | Bin 0 -> 1613 bytes ...ilogueTraits___00_01true_01_4-members.html | 120 + ...01GemmEpilogueTraits___00_01true_01_4.html | 275 + ..._01GemmEpilogueTraits___00_01true_01_4.png | Bin 0 -> 1176 bytes ..._1_1IgemmFloatToInt8Converter-members.html | 95 + ..._1_1gemm_1_1IgemmFloatToInt8Converter.html | 265 + ...1_1IgemmGlobalLoadTransformer-members.html | 91 + ...1_1gemm_1_1IgemmGlobalLoadTransformer.html | 118 + ...ements___01_4_00_01float_01_4-members.html | 91 + ...00_01kElements___01_4_00_01float_01_4.html | 118 + ..._1IgemmGlobalStoreTransformer-members.html | 91 + ..._1gemm_1_1IgemmGlobalStoreTransformer.html | 118 + ..._t_00_01kElements___01_4_01_4-members.html | 91 + ..._01int8__t_00_01kElements___01_4_01_4.html | 118 + ..._1_1IgemmInt8ToFloatConverter-members.html | 95 + ..._1_1gemm_1_1IgemmInt8ToFloatConverter.html | 265 + ..._1IgemmSharedStoreTransformer-members.html | 91 + ..._1gemm_1_1IgemmSharedStoreTransformer.html | 118 + ...tlass_1_1gemm_1_1IgemmSwizzle-members.html | 97 + ...structcutlass_1_1gemm_1_1IgemmSwizzle.html | 273 + ...ass_1_1gemm_1_1IgemmTileTraitsHelperA.html | 101 + ...lass_1_1gemm_1_1IgemmTileTraitsHelperA.png | Bin 0 -> 1401 bytes ...nMajor_00_01GemmConfig___01_4-members.html | 98 + ..._1kColumnMajor_00_01GemmConfig___01_4.html | 218 + ...1_1kColumnMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1674 bytes ...ass_1_1gemm_1_1IgemmTileTraitsHelperB.html | 101 + ...lass_1_1gemm_1_1IgemmTileTraitsHelperB.png | Bin 0 -> 1400 bytes ...wMajor_00_01GemmConfig___01_4-members.html | 98 + ...t_1_1kRowMajor_00_01GemmConfig___01_4.html | 218 + ...ut_1_1kRowMajor_00_01GemmConfig___01_4.png | Bin 0 -> 1634 bytes ...utlass_1_1gemm_1_1IgemmTraits-members.html | 111 + .../structcutlass_1_1gemm_1_1IgemmTraits.html | 172 + .../structcutlass_1_1gemm_1_1IgemmTraits.png | Bin 0 -> 3096 bytes ..._1_1gemm_1_1IgemmTraitsHelper-members.html | 108 + ...tcutlass_1_1gemm_1_1IgemmTraitsHelper.html | 441 + ...tcutlass_1_1gemm_1_1IgemmTransformerA.html | 92 + ...umnMajor_00_01Iterator___01_4-members.html | 91 + ..._1_1kColumnMajor_00_01Iterator___01_4.html | 118 + ...RowMajor_00_01Iterator___01_4-members.html | 91 + ...out_1_1kRowMajor_00_01Iterator___01_4.html | 118 + ...tcutlass_1_1gemm_1_1IgemmTransformerB.html | 92 + ...umnMajor_00_01Iterator___01_4-members.html | 91 + ..._1_1kColumnMajor_00_01Iterator___01_4.html | 118 + ...RowMajor_00_01Iterator___01_4-members.html | 91 + ...out_1_1kRowMajor_00_01Iterator___01_4.html | 118 + ...lass_1_1gemm_1_1LinearScaling-members.html | 97 + ...tructcutlass_1_1gemm_1_1LinearScaling.html | 319 + ...mm_1_1LinearScaling_1_1Params-members.html | 93 + ...ss_1_1gemm_1_1LinearScaling_1_1Params.html | 179 + ...ructcutlass_1_1gemm_1_1ProjectOperand.html | 97 + ...rand_1_1kA_00_01Kstrided_01_4-members.html | 91 + ...1GemmOperand_1_1kA_00_01Kstrided_01_4.html | 133 + ...rand_1_1kB_00_01Kstrided_01_4-members.html | 91 + ...1GemmOperand_1_1kB_00_01Kstrided_01_4.html | 133 + ...mOperand_1_1kC_00_01true_01_4-members.html | 91 + ..._3_01GemmOperand_1_1kC_00_01true_01_4.html | 131 + ...mOperand_1_1kD_00_01true_01_4-members.html | 91 + ..._3_01GemmOperand_1_1kD_00_01true_01_4.html | 131 + ...ass_1_1gemm_1_1ReshapeThreads-members.html | 91 + ...ructcutlass_1_1gemm_1_1ReshapeThreads.html | 118 + ...00_01Threads___00_01true_01_4-members.html | 91 + ...1Tile___00_01Threads___00_01true_01_4.html | 118 + ...utlass_1_1gemm_1_1SgemmConfig-members.html | 115 + .../structcutlass_1_1gemm_1_1SgemmConfig.html | 177 + .../structcutlass_1_1gemm_1_1SgemmConfig.png | Bin 0 -> 2930 bytes ...utlass_1_1gemm_1_1SgemmTraits-members.html | 111 + .../structcutlass_1_1gemm_1_1SgemmTraits.html | 173 + .../structcutlass_1_1gemm_1_1SgemmTraits.png | Bin 0 -> 8487 bytes ...s_1_1gemm_1_1SharedLoadStream-members.html | 105 + ...ctcutlass_1_1gemm_1_1SharedLoadStream.html | 526 ++ ...1_1SharedLoadStream_1_1Params-members.html | 92 + ...1_1gemm_1_1SharedLoadStream_1_1Params.html | 157 + ..._1_1gemm_1_1SharedStoreStream-members.html | 102 + ...tcutlass_1_1gemm_1_1SharedStoreStream.html | 405 + ..._1SharedStoreStream_1_1Params-members.html | 92 + ..._1gemm_1_1SharedStoreStream_1_1Params.html | 157 + ...1SimplifiedGemmEpilogueTraits-members.html | 106 + ...1gemm_1_1SimplifiedGemmEpilogueTraits.html | 155 + ..._1gemm_1_1SimplifiedGemmEpilogueTraits.png | Bin 0 -> 3650 bytes ...1gemm_1_1SimplifiedGemmTraits-members.html | 111 + ...tlass_1_1gemm_1_1SimplifiedGemmTraits.html | 172 + ...utlass_1_1gemm_1_1SimplifiedGemmTraits.png | Bin 0 -> 3188 bytes ...1_1SimplifiedGemmTraitsHelper-members.html | 102 + ...1_1gemm_1_1SimplifiedGemmTraitsHelper.html | 328 + ..._1_1gemm_1_1ThreadMultiplyAdd-members.html | 102 + ...tcutlass_1_1gemm_1_1ThreadMultiplyAdd.html | 382 + ...__00_0179827d5e1abec446b31df6ae50a9c4.html | 102 + ...___00_01half_00_01half_00_01half_01_4.html | 383 + ...__00_ea75a025471611dd709d5f2a07d1bc06.html | 102 + ...__00_f5353db950bbf0023472029cac4814b6.html | 382 + ...m_1_1WmmaGemmGlobalIteratorCd-members.html | 131 + ...s_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html | 820 ++ ...ss_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png | Bin 0 -> 1763 bytes ...mmaGemmGlobalIteratorCdTraits-members.html | 104 + ...emm_1_1WmmaGemmGlobalIteratorCdTraits.html | 197 + ...gemm_1_1WmmaGemmGlobalIteratorCdTraits.png | Bin 0 -> 2187 bytes ...ratorCdTraits_1_1ThreadOffset-members.html | 91 + ...lobalIteratorCdTraits_1_1ThreadOffset.html | 132 + ...emmGlobalIteratorCd_1_1Params-members.html | 98 + ...1_1WmmaGemmGlobalIteratorCd_1_1Params.html | 298 + ...GemmGlobalIteratorCd_1_1SharedStorage.html | 95 + .../structcutlass_1_1is__pow2-members.html | 95 + .../structcutlass_1_1is__pow2.html | 125 + .../structcutlass_1_1is__pow2.png | Bin 0 -> 1061 bytes .../structcutlass_1_1log2__down-members.html | 91 + .../structcutlass_1_1log2__down.html | 128 + ..._3_01N_00_011_00_01Count_01_4-members.html | 91 + ...g2__down_3_01N_00_011_00_01Count_01_4.html | 122 + .../structcutlass_1_1log2__up-members.html | 91 + .../structcutlass_1_1log2__up.html | 128 + ..._3_01N_00_011_00_01Count_01_4-members.html | 91 + ...log2__up_3_01N_00_011_00_01Count_01_4.html | 122 + ...cutlass_1_1platform_1_1aligned__chunk.html | 92 + ...1platform_1_1aligned__storage-members.html | 91 + ...tlass_1_1platform_1_1aligned__storage.html | 121 + ..._1_1platform_1_1alignment__of-members.html | 91 + ...tcutlass_1_1platform_1_1alignment__of.html | 142 + ...ctcutlass_1_1platform_1_1alignment__of.png | Bin 0 -> 1846 bytes ...tform_1_1alignment__of_1_1pad-members.html | 92 + ...s_1_1platform_1_1alignment__of_1_1pad.html | 136 + ..._of_3_01const_01value__t_01_4-members.html | 91 + ...ignment__of_3_01const_01value__t_01_4.html | 111 + ...lignment__of_3_01const_01value__t_01_4.png | Bin 0 -> 1078 bytes ...st_01volatile_01value__t_01_4-members.html | 91 + ..._3_01const_01volatile_01value__t_01_4.html | 111 + ...f_3_01const_01volatile_01value__t_01_4.png | Bin 0 -> 1141 bytes ...lignment__of_3_01double2_01_4-members.html | 91 + ...orm_1_1alignment__of_3_01double2_01_4.html | 120 + ...lignment__of_3_01double4_01_4-members.html | 91 + ...orm_1_1alignment__of_3_01double4_01_4.html | 120 + ...alignment__of_3_01float4_01_4-members.html | 91 + ...form_1_1alignment__of_3_01float4_01_4.html | 120 + ..._1alignment__of_3_01int4_01_4-members.html | 91 + ...atform_1_1alignment__of_3_01int4_01_4.html | 120 + ...1alignment__of_3_01long4_01_4-members.html | 91 + ...tform_1_1alignment__of_3_01long4_01_4.html | 120 + ...gnment__of_3_01longlong2_01_4-members.html | 91 + ...m_1_1alignment__of_3_01longlong2_01_4.html | 120 + ...gnment__of_3_01longlong4_01_4-members.html | 91 + ...m_1_1alignment__of_3_01longlong4_01_4.html | 120 + ...1alignment__of_3_01uint4_01_4-members.html | 91 + ...tform_1_1alignment__of_3_01uint4_01_4.html | 120 + ...alignment__of_3_01ulong4_01_4-members.html | 91 + ...form_1_1alignment__of_3_01ulong4_01_4.html | 120 + ...nment__of_3_01ulonglong2_01_4-members.html | 91 + ..._1_1alignment__of_3_01ulonglong2_01_4.html | 120 + ...nment__of_3_01ulonglong4_01_4-members.html | 91 + ..._1_1alignment__of_3_01ulonglong4_01_4.html | 120 + ..._3_01volatile_01value__t_01_4-members.html | 91 + ...ment__of_3_01volatile_01value__t_01_4.html | 111 + ...nment__of_3_01volatile_01value__t_01_4.png | Bin 0 -> 1103 bytes ...1_1platform_1_1bool__constant-members.html | 95 + ...cutlass_1_1platform_1_1bool__constant.html | 123 + ...tcutlass_1_1platform_1_1bool__constant.png | Bin 0 -> 954 bytes ...ss_1_1platform_1_1conditional-members.html | 91 + ...uctcutlass_1_1platform_1_1conditional.html | 121 + ..._3_01false_00_01T_00_01F_01_4-members.html | 91 + ...ditional_3_01false_00_01T_00_01F_01_4.html | 121 + ..._1platform_1_1default__delete-members.html | 91 + ...utlass_1_1platform_1_1default__delete.html | 133 + ..._1_1default__delete_3_01T[]_4-members.html | 91 + ...platform_1_1default__delete_3_01T[]_4.html | 133 + ...ass_1_1platform_1_1enable__if-members.html | 91 + ...ructcutlass_1_1platform_1_1enable__if.html | 121 + ...m_1_1enable__if_3_01false_00_01T_01_4.html | 95 + ...utlass_1_1platform_1_1greater-members.html | 91 + .../structcutlass_1_1platform_1_1greater.html | 143 + ...latform_1_1integral__constant-members.html | 95 + ...ass_1_1platform_1_1integral__constant.html | 261 + ...lass_1_1platform_1_1integral__constant.png | Bin 0 -> 14622 bytes ...1_1platform_1_1is__arithmetic-members.html | 95 + ...cutlass_1_1platform_1_1is__arithmetic.html | 123 + ...tcutlass_1_1platform_1_1is__arithmetic.png | Bin 0 -> 1423 bytes ...s_1_1platform_1_1is__base__of-members.html | 95 + ...ctcutlass_1_1platform_1_1is__base__of.html | 123 + ...uctcutlass_1_1platform_1_1is__base__of.png | Bin 0 -> 2206 bytes ...tform_1_1is__base__of__helper-members.html | 95 + ...s_1_1platform_1_1is__base__of__helper.html | 264 + ...is__base__of__helper_1_1dummy-members.html | 92 + ...form_1_1is__base__of__helper_1_1dummy.html | 146 + ...atform_1_1is__floating__point-members.html | 95 + ...ss_1_1platform_1_1is__floating__point.html | 123 + ...ass_1_1platform_1_1is__floating__point.png | Bin 0 -> 1768 bytes ..._1platform_1_1is__fundamental-members.html | 95 + ...utlass_1_1platform_1_1is__fundamental.html | 123 + ...cutlass_1_1platform_1_1is__fundamental.png | Bin 0 -> 1831 bytes ...s_1_1platform_1_1is__integral-members.html | 95 + ...ctcutlass_1_1platform_1_1is__integral.html | 126 + ...uctcutlass_1_1platform_1_1is__integral.png | Bin 0 -> 2524 bytes ...1_1is__integral_3_01char_01_4-members.html | 95 + ...latform_1_1is__integral_3_01char_01_4.html | 120 + ...platform_1_1is__integral_3_01char_01_4.png | Bin 0 -> 1029 bytes ...__integral_3_01const_01T_01_4-members.html | 95 + ...rm_1_1is__integral_3_01const_01T_01_4.html | 121 + ...orm_1_1is__integral_3_01const_01T_01_4.png | Bin 0 -> 1504 bytes ...3_01const_01volatile_01T_01_4-members.html | 95 + ...ntegral_3_01const_01volatile_01T_01_4.html | 121 + ...integral_3_01const_01volatile_01T_01_4.png | Bin 0 -> 1547 bytes ..._1_1is__integral_3_01int_01_4-members.html | 95 + ...platform_1_1is__integral_3_01int_01_4.html | 120 + ...1platform_1_1is__integral_3_01int_01_4.png | Bin 0 -> 1013 bytes ...1_1is__integral_3_01long_01_4-members.html | 95 + ...latform_1_1is__integral_3_01long_01_4.html | 120 + ...platform_1_1is__integral_3_01long_01_4.png | Bin 0 -> 1017 bytes ...integral_3_01long_01long_01_4-members.html | 95 + ..._1_1is__integral_3_01long_01long_01_4.html | 120 + ...m_1_1is__integral_3_01long_01long_01_4.png | Bin 0 -> 1037 bytes ..._1is__integral_3_01short_01_4-members.html | 95 + ...atform_1_1is__integral_3_01short_01_4.html | 120 + ...latform_1_1is__integral_3_01short_01_4.png | Bin 0 -> 1024 bytes ...tegral_3_01signed_01char_01_4-members.html | 95 + ..._1is__integral_3_01signed_01char_01_4.html | 120 + ...1_1is__integral_3_01signed_01char_01_4.png | Bin 0 -> 1078 bytes ...gral_3_01unsigned_01char_01_4-members.html | 95 + ...is__integral_3_01unsigned_01char_01_4.html | 120 + ...1is__integral_3_01unsigned_01char_01_4.png | Bin 0 -> 1084 bytes ...egral_3_01unsigned_01int_01_4-members.html | 95 + ...1is__integral_3_01unsigned_01int_01_4.html | 120 + ..._1is__integral_3_01unsigned_01int_01_4.png | Bin 0 -> 1073 bytes ...gral_3_01unsigned_01long_01_4-members.html | 95 + ...is__integral_3_01unsigned_01long_01_4.html | 120 + ...1is__integral_3_01unsigned_01long_01_4.png | Bin 0 -> 1080 bytes ...01unsigned_01long_01long_01_4-members.html | 95 + ...egral_3_01unsigned_01long_01long_01_4.html | 120 + ...tegral_3_01unsigned_01long_01long_01_4.png | Bin 0 -> 1099 bytes ...ral_3_01unsigned_01short_01_4-members.html | 95 + ...s__integral_3_01unsigned_01short_01_4.html | 120 + ...is__integral_3_01unsigned_01short_01_4.png | Bin 0 -> 1095 bytes ...ntegral_3_01volatile_01T_01_4-members.html | 95 + ...1_1is__integral_3_01volatile_01T_01_4.html | 121 + ..._1_1is__integral_3_01volatile_01T_01_4.png | Bin 0 -> 1515 bytes ...ss_1_1platform_1_1is__pointer-members.html | 95 + ...uctcutlass_1_1platform_1_1is__pointer.html | 124 + ...ructcutlass_1_1platform_1_1is__pointer.png | Bin 0 -> 1686 bytes ...atform_1_1is__pointer__helper-members.html | 95 + ...ss_1_1platform_1_1is__pointer__helper.html | 123 + ...ass_1_1platform_1_1is__pointer__helper.png | Bin 0 -> 1024 bytes ...inter__helper_3_01T_01_5_01_4-members.html | 95 + ..._1is__pointer__helper_3_01T_01_5_01_4.html | 123 + ...1_1is__pointer__helper_3_01T_01_5_01_4.png | Bin 0 -> 1034 bytes ...tlass_1_1platform_1_1is__same-members.html | 95 + ...structcutlass_1_1platform_1_1is__same.html | 123 + .../structcutlass_1_1platform_1_1is__same.png | Bin 0 -> 1016 bytes ...1_1is__same_3_01A_00_01A_01_4-members.html | 95 + ...latform_1_1is__same_3_01A_00_01A_01_4.html | 123 + ...platform_1_1is__same_3_01A_00_01A_01_4.png | Bin 0 -> 1003 bytes ...rm_1_1is__trivially__copyable-members.html | 95 + ..._1platform_1_1is__trivially__copyable.html | 127 + ...1_1platform_1_1is__trivially__copyable.png | Bin 0 -> 1472 bytes ...tlass_1_1platform_1_1is__void-members.html | 95 + ...structcutlass_1_1platform_1_1is__void.html | 124 + .../structcutlass_1_1platform_1_1is__void.png | Bin 0 -> 1609 bytes ...s_1_1platform_1_1is__volatile-members.html | 95 + ...ctcutlass_1_1platform_1_1is__volatile.html | 123 + ...uctcutlass_1_1platform_1_1is__volatile.png | Bin 0 -> 989 bytes ...olatile_3_01volatile_01T_01_4-members.html | 95 + ...1_1is__volatile_3_01volatile_01T_01_4.html | 120 + ..._1_1is__volatile_3_01volatile_01T_01_4.png | Bin 0 -> 1004 bytes ...ctcutlass_1_1platform_1_1less-members.html | 91 + .../structcutlass_1_1platform_1_1less.html | 143 + ...ructcutlass_1_1platform_1_1nullptr__t.html | 95 + ...ctcutlass_1_1platform_1_1plus-members.html | 91 + .../structcutlass_1_1platform_1_1plus.html | 143 + ..._1_1platform_1_1remove__const-members.html | 91 + ...tcutlass_1_1platform_1_1remove__const.html | 121 + ...ove__const_3_01const_01T_01_4-members.html | 91 + ...m_1_1remove__const_3_01const_01T_01_4.html | 121 + ...ass_1_1platform_1_1remove__cv-members.html | 91 + ...ructcutlass_1_1platform_1_1remove__cv.html | 121 + ...1platform_1_1remove__volatile-members.html | 91 + ...tlass_1_1platform_1_1remove__volatile.html | 121 + ...olatile_3_01volatile_01T_01_4-members.html | 91 + ...emove__volatile_3_01volatile_01T_01_4.html | 121 + .../structcutlass_1_1sqrt__est-members.html | 91 + .../structcutlass_1_1sqrt__est.html | 127 + .../structnv__std_1_1aligned__chunk.html | 92 + ...ctnv__std_1_1aligned__storage-members.html | 91 + .../structnv__std_1_1aligned__storage.html | 121 + ...tructnv__std_1_1alignment__of-members.html | 91 + .../structnv__std_1_1alignment__of.html | 142 + .../structnv__std_1_1alignment__of.png | Bin 0 -> 1636 bytes ...__std_1_1alignment__of_1_1pad-members.html | 92 + ...structnv__std_1_1alignment__of_1_1pad.html | 136 + ..._of_3_01const_01value__t_01_4-members.html | 91 + ...ignment__of_3_01const_01value__t_01_4.html | 111 + ...lignment__of_3_01const_01value__t_01_4.png | Bin 0 -> 861 bytes ...st_01volatile_01value__t_01_4-members.html | 91 + ..._3_01const_01volatile_01value__t_01_4.html | 111 + ...f_3_01const_01volatile_01value__t_01_4.png | Bin 0 -> 988 bytes ...lignment__of_3_01double2_01_4-members.html | 91 + ...std_1_1alignment__of_3_01double2_01_4.html | 120 + ...lignment__of_3_01double4_01_4-members.html | 91 + ...std_1_1alignment__of_3_01double4_01_4.html | 120 + ...alignment__of_3_01float4_01_4-members.html | 91 + ..._std_1_1alignment__of_3_01float4_01_4.html | 120 + ..._1alignment__of_3_01int4_01_4-members.html | 91 + ...v__std_1_1alignment__of_3_01int4_01_4.html | 120 + ...1alignment__of_3_01long4_01_4-members.html | 91 + ...__std_1_1alignment__of_3_01long4_01_4.html | 120 + ...gnment__of_3_01longlong2_01_4-members.html | 91 + ...d_1_1alignment__of_3_01longlong2_01_4.html | 120 + ...gnment__of_3_01longlong4_01_4-members.html | 91 + ...d_1_1alignment__of_3_01longlong4_01_4.html | 120 + ...1alignment__of_3_01uint4_01_4-members.html | 91 + ...__std_1_1alignment__of_3_01uint4_01_4.html | 120 + ...alignment__of_3_01ulong4_01_4-members.html | 91 + ..._std_1_1alignment__of_3_01ulong4_01_4.html | 120 + ...nment__of_3_01ulonglong2_01_4-members.html | 91 + ..._1_1alignment__of_3_01ulonglong2_01_4.html | 120 + ...nment__of_3_01ulonglong4_01_4-members.html | 91 + ..._1_1alignment__of_3_01ulonglong4_01_4.html | 120 + ..._3_01volatile_01value__t_01_4-members.html | 91 + ...ment__of_3_01volatile_01value__t_01_4.html | 111 + ...nment__of_3_01volatile_01value__t_01_4.png | Bin 0 -> 882 bytes ...ructnv__std_1_1bool__constant-members.html | 95 + .../structnv__std_1_1bool__constant.html | 123 + .../structnv__std_1_1bool__constant.png | Bin 0 -> 784 bytes .../structnv__std_1_1conditional-members.html | 91 + .../structnv__std_1_1conditional.html | 121 + ..._3_01false_00_01T_00_01F_01_4-members.html | 91 + ...ditional_3_01false_00_01T_00_01F_01_4.html | 121 + ...uctnv__std_1_1default__delete-members.html | 91 + .../structnv__std_1_1default__delete.html | 133 + ..._1_1default__delete_3_01T[]_4-members.html | 91 + ...tnv__std_1_1default__delete_3_01T[]_4.html | 133 + .../structnv__std_1_1enable__if-members.html | 91 + .../structnv__std_1_1enable__if.html | 121 + ...d_1_1enable__if_3_01false_00_01T_01_4.html | 95 + .../structnv__std_1_1greater-members.html | 91 + .../structnv__std_1_1greater.html | 143 + ...nv__std_1_1integral__constant-members.html | 95 + .../structnv__std_1_1integral__constant.html | 261 + .../structnv__std_1_1integral__constant.png | Bin 0 -> 12811 bytes ...ructnv__std_1_1is__arithmetic-members.html | 95 + .../structnv__std_1_1is__arithmetic.html | 123 + .../structnv__std_1_1is__arithmetic.png | Bin 0 -> 1222 bytes ...structnv__std_1_1is__base__of-members.html | 95 + .../structnv__std_1_1is__base__of.html | 123 + .../structnv__std_1_1is__base__of.png | Bin 0 -> 2076 bytes ...__std_1_1is__base__of__helper-members.html | 95 + ...structnv__std_1_1is__base__of__helper.html | 264 + ...is__base__of__helper_1_1dummy-members.html | 92 + ..._std_1_1is__base__of__helper_1_1dummy.html | 146 + ...v__std_1_1is__floating__point-members.html | 95 + .../structnv__std_1_1is__floating__point.html | 123 + .../structnv__std_1_1is__floating__point.png | Bin 0 -> 1588 bytes ...uctnv__std_1_1is__fundamental-members.html | 95 + .../structnv__std_1_1is__fundamental.html | 123 + .../structnv__std_1_1is__fundamental.png | Bin 0 -> 1662 bytes ...structnv__std_1_1is__integral-members.html | 95 + .../structnv__std_1_1is__integral.html | 126 + .../structnv__std_1_1is__integral.png | Bin 0 -> 2010 bytes ...1_1is__integral_3_01char_01_4-members.html | 95 + ...nv__std_1_1is__integral_3_01char_01_4.html | 120 + ...tnv__std_1_1is__integral_3_01char_01_4.png | Bin 0 -> 828 bytes ...__integral_3_01const_01T_01_4-members.html | 95 + ...td_1_1is__integral_3_01const_01T_01_4.html | 121 + ...std_1_1is__integral_3_01const_01T_01_4.png | Bin 0 -> 1170 bytes ...3_01const_01volatile_01T_01_4-members.html | 95 + ...ntegral_3_01const_01volatile_01T_01_4.html | 121 + ...integral_3_01const_01volatile_01T_01_4.png | Bin 0 -> 1219 bytes ..._1_1is__integral_3_01int_01_4-members.html | 95 + ...tnv__std_1_1is__integral_3_01int_01_4.html | 120 + ...ctnv__std_1_1is__integral_3_01int_01_4.png | Bin 0 -> 812 bytes ...1_1is__integral_3_01long_01_4-members.html | 95 + ...nv__std_1_1is__integral_3_01long_01_4.html | 120 + ...tnv__std_1_1is__integral_3_01long_01_4.png | Bin 0 -> 810 bytes ...integral_3_01long_01long_01_4-members.html | 95 + ..._1_1is__integral_3_01long_01long_01_4.html | 120 + ...d_1_1is__integral_3_01long_01long_01_4.png | Bin 0 -> 829 bytes ..._1is__integral_3_01short_01_4-members.html | 95 + ...v__std_1_1is__integral_3_01short_01_4.html | 120 + ...nv__std_1_1is__integral_3_01short_01_4.png | Bin 0 -> 824 bytes ...tegral_3_01signed_01char_01_4-members.html | 95 + ..._1is__integral_3_01signed_01char_01_4.html | 120 + ...1_1is__integral_3_01signed_01char_01_4.png | Bin 0 -> 878 bytes ...gral_3_01unsigned_01char_01_4-members.html | 95 + ...is__integral_3_01unsigned_01char_01_4.html | 120 + ...1is__integral_3_01unsigned_01char_01_4.png | Bin 0 -> 883 bytes ...egral_3_01unsigned_01int_01_4-members.html | 95 + ...1is__integral_3_01unsigned_01int_01_4.html | 120 + ..._1is__integral_3_01unsigned_01int_01_4.png | Bin 0 -> 873 bytes ...gral_3_01unsigned_01long_01_4-members.html | 95 + ...is__integral_3_01unsigned_01long_01_4.html | 120 + ...1is__integral_3_01unsigned_01long_01_4.png | Bin 0 -> 876 bytes ...01unsigned_01long_01long_01_4-members.html | 95 + ...egral_3_01unsigned_01long_01long_01_4.html | 120 + ...tegral_3_01unsigned_01long_01long_01_4.png | Bin 0 -> 887 bytes ...ral_3_01unsigned_01short_01_4-members.html | 95 + ...s__integral_3_01unsigned_01short_01_4.html | 120 + ...is__integral_3_01unsigned_01short_01_4.png | Bin 0 -> 885 bytes ...ntegral_3_01volatile_01T_01_4-members.html | 95 + ...1_1is__integral_3_01volatile_01T_01_4.html | 121 + ..._1_1is__integral_3_01volatile_01T_01_4.png | Bin 0 -> 1188 bytes .../structnv__std_1_1is__pointer-members.html | 95 + .../structnv__std_1_1is__pointer.html | 124 + .../structnv__std_1_1is__pointer.png | Bin 0 -> 1479 bytes ...v__std_1_1is__pointer__helper-members.html | 95 + .../structnv__std_1_1is__pointer__helper.html | 123 + .../structnv__std_1_1is__pointer__helper.png | Bin 0 -> 827 bytes ...inter__helper_3_01T_01_5_01_4-members.html | 95 + ..._1is__pointer__helper_3_01T_01_5_01_4.html | 123 + ...1_1is__pointer__helper_3_01T_01_5_01_4.png | Bin 0 -> 834 bytes .../structnv__std_1_1is__same-members.html | 95 + .../structnv__std_1_1is__same.html | 123 + .../structnv__std_1_1is__same.png | Bin 0 -> 794 bytes ...1_1is__same_3_01A_00_01A_01_4-members.html | 95 + ...nv__std_1_1is__same_3_01A_00_01A_01_4.html | 123 + ...tnv__std_1_1is__same_3_01A_00_01A_01_4.png | Bin 0 -> 786 bytes ...td_1_1is__trivially__copyable-members.html | 95 + ...uctnv__std_1_1is__trivially__copyable.html | 127 + ...ructnv__std_1_1is__trivially__copyable.png | Bin 0 -> 1280 bytes .../structnv__std_1_1is__void-members.html | 95 + .../structnv__std_1_1is__void.html | 124 + .../structnv__std_1_1is__void.png | Bin 0 -> 1391 bytes ...structnv__std_1_1is__volatile-members.html | 95 + .../structnv__std_1_1is__volatile.html | 123 + .../structnv__std_1_1is__volatile.png | Bin 0 -> 786 bytes ...olatile_3_01volatile_01T_01_4-members.html | 95 + ...1_1is__volatile_3_01volatile_01T_01_4.html | 120 + ..._1_1is__volatile_3_01volatile_01T_01_4.png | Bin 0 -> 801 bytes .../structnv__std_1_1less-members.html | 91 + .../generated-html/structnv__std_1_1less.html | 143 + .../structnv__std_1_1nullptr__t.html | 95 + .../structnv__std_1_1plus-members.html | 91 + .../generated-html/structnv__std_1_1plus.html | 143 + ...tructnv__std_1_1remove__const-members.html | 91 + .../structnv__std_1_1remove__const.html | 121 + ...ove__const_3_01const_01T_01_4-members.html | 91 + ...d_1_1remove__const_3_01const_01T_01_4.html | 121 + .../structnv__std_1_1remove__cv-members.html | 91 + .../structnv__std_1_1remove__cv.html | 121 + ...ctnv__std_1_1remove__volatile-members.html | 91 + .../structnv__std_1_1remove__volatile.html | 121 + ...olatile_3_01volatile_01T_01_4-members.html | 91 + ...emove__volatile_3_01volatile_01T_01_4.html | 121 + docs/generated-html/sync_off.png | Bin 0 -> 855 bytes docs/generated-html/sync_on.png | Bin 0 -> 846 bytes docs/generated-html/tab_a.png | Bin 0 -> 146 bytes docs/generated-html/tab_b.png | Bin 0 -> 171 bytes docs/generated-html/tab_h.png | Bin 0 -> 178 bytes docs/generated-html/tab_s.png | Bin 0 -> 191 bytes docs/generated-html/tabs.css | 1 + docs/generated-html/tensor__ref_8h.html | 111 + .../generated-html/tensor__ref_8h_source.html | 117 + docs/generated-html/tensor__view_8h.html | 110 + .../tensor__view_8h_source.html | 127 + .../thread__multiply__add_8h.html | 110 + .../thread__multiply__add_8h_source.html | 105 + docs/generated-html/tile_8h.html | 109 + docs/generated-html/tile_8h_source.html | 93 + docs/generated-html/tile__iterator_8h.html | 135 + .../tile__iterator_8h_source.html | 246 + .../tile__traits__standard_8h.html | 121 + .../tile__traits__standard_8h_source.html | 132 + .../unioncutlass_1_1Vector-members.html | 98 + .../unioncutlass_1_1Vector.html | 314 + ...r_3_01half_00_01kLanes___01_4-members.html | 98 + ...1_1Vector_3_01half_00_01kLanes___01_4.html | 314 + ...Traits_1_1StreamSharedStorage-members.html | 92 + ...EpilogueTraits_1_1StreamSharedStorage.html | 139 + ..._1GemmTraits_1_1SharedStorage-members.html | 92 + ..._1gemm_1_1GemmTraits_1_1SharedStorage.html | 139 + ...Traits_1_1StreamSharedStorage-members.html | 92 + ..._1_1GemmTraits_1_1StreamSharedStorage.html | 140 + ...adStreamBase_1_1SharedStorage-members.html | 92 + ...GlobalLoadStreamBase_1_1SharedStorage.html | 139 + docs/generated-html/vector_8h.html | 165 + docs/generated-html/vector_8h_source.html | 120 + .../wmma__gemm__epilogue__traits_8h.html | 93 + ...mma__gemm__epilogue__traits_8h_source.html | 104 + .../wmma__gemm__global__tile_8h.html | 117 + .../wmma__gemm__global__tile_8h_source.html | 142 + .../wmma__gemm__multiply__add_8h.html | 93 + .../wmma__gemm__multiply__add_8h_source.html | 92 + .../wmma__gemm__shared__tile_8h.html | 93 + .../wmma__gemm__shared__tile_8h_source.html | 103 + .../generated-html/wmma__gemm__traits_8h.html | 93 + .../wmma__gemm__traits_8h_source.html | 109 + docs/generated-html/wmma__matrix_8h.html | 92 + .../wmma__matrix_8h_source.html | 96 + media/cutlass-performance-plot.png | Bin 40183 -> 0 bytes media/fig-09-complete-hierarchy.png | Bin 38236 -> 0 bytes media/images/cutlass-performance-plot.png | Bin 0 -> 114552 bytes ...gemm-hierarchy-with-epilogue-no-labels.png | Bin 0 -> 223572 bytes media/images/gemm-hierarchy-with-epilogue.png | Bin 0 -> 256654 bytes tools/CMakeLists.txt | 26 + tools/external/googletest/.gitignore | 24 + tools/external/googletest/.travis.yml | 74 + tools/external/googletest/BUILD.bazel | 147 + tools/external/googletest/CMakeLists.txt | 33 + tools/external/googletest/README.md | 148 + tools/external/googletest/WORKSPACE | 1 + tools/external/googletest/appveyor.yml | 96 + .../googletest/ci/build-linux-bazel.sh | 35 + tools/external/googletest/ci/env-linux.sh | 41 + tools/external/googletest/ci/env-osx.sh | 40 + tools/external/googletest/ci/install-linux.sh | 49 + tools/external/googletest/ci/install-osx.sh | 39 + tools/external/googletest/ci/log-config.sh | 51 + tools/external/googletest/ci/travis.sh | 34 + tools/external/googletest/googlemock/CHANGES | 126 + .../googletest/googlemock/CMakeLists.txt | 225 + .../googletest/googlemock/CONTRIBUTORS | 40 + tools/external/googletest/googlemock/LICENSE | 28 + .../googletest/googlemock/Makefile.am | 224 + .../external/googletest/googlemock/README.md | 376 + .../googletest/googlemock/build-aux/.keep | 0 .../googletest/googlemock/cmake/gmock.pc.in | 9 + .../googlemock/cmake/gmock_main.pc.in | 9 + .../googletest/googlemock/configure.ac | 146 + .../googletest/googlemock/docs/CheatSheet.md | 562 ++ .../googletest/googlemock/docs/CookBook.md | 3679 ++++++++ .../googletest/googlemock/docs/DesignDoc.md | 280 + .../googletest/googlemock/docs/DevGuide.md | 132 + .../googlemock/docs/Documentation.md | 15 + .../googletest/googlemock/docs/ForDummies.md | 447 + .../docs/FrequentlyAskedQuestions.md | 628 ++ .../googletest/googlemock/docs/KnownIssues.md | 19 + .../googlemock/include/gmock/gmock-actions.h | 1205 +++ .../include/gmock/gmock-cardinalities.h | 147 + .../include/gmock/gmock-generated-actions.h | 2377 +++++ .../gmock/gmock-generated-actions.h.pump | 794 ++ .../gmock/gmock-generated-function-mockers.h | 1095 +++ .../gmock-generated-function-mockers.h.pump | 291 + .../include/gmock/gmock-generated-matchers.h | 2191 +++++ .../gmock/gmock-generated-matchers.h.pump | 673 ++ .../gmock/gmock-generated-nice-strict.h | 397 + .../gmock/gmock-generated-nice-strict.h.pump | 161 + .../googlemock/include/gmock/gmock-matchers.h | 4416 ++++++++++ .../include/gmock/gmock-more-actions.h | 246 + .../include/gmock/gmock-more-matchers.h | 58 + .../include/gmock/gmock-spec-builders.h | 1849 ++++ .../googlemock/include/gmock/gmock.h | 95 + .../internal/custom/gmock-generated-actions.h | 8 + .../custom/gmock-generated-actions.h.pump | 10 + .../gmock/internal/custom/gmock-matchers.h | 39 + .../gmock/internal/custom/gmock-port.h | 46 + .../internal/gmock-generated-internal-utils.h | 279 + .../gmock-generated-internal-utils.h.pump | 136 + .../gmock/internal/gmock-internal-utils.h | 510 ++ .../include/gmock/internal/gmock-port.h | 91 + .../googletest/googlemock/make/Makefile | 101 + .../googletest/googlemock/msvc/2005/gmock.sln | 32 + .../googlemock/msvc/2005/gmock.vcproj | 191 + .../googlemock/msvc/2005/gmock_config.vsprops | 15 + .../googlemock/msvc/2005/gmock_main.vcproj | 187 + .../googlemock/msvc/2005/gmock_test.vcproj | 201 + .../googletest/googlemock/msvc/2010/gmock.sln | 46 + .../googlemock/msvc/2010/gmock.vcxproj | 145 + .../googlemock/msvc/2010/gmock_config.props | 19 + .../googlemock/msvc/2010/gmock_main.vcxproj | 151 + .../googlemock/msvc/2010/gmock_test.vcxproj | 176 + .../googletest/googlemock/msvc/2015/gmock.sln | 46 + .../googlemock/msvc/2015/gmock.vcxproj | 145 + .../googlemock/msvc/2015/gmock_config.props | 19 + .../googlemock/msvc/2015/gmock_main.vcxproj | 151 + .../googlemock/msvc/2015/gmock_test.vcxproj | 176 + .../googlemock/scripts/fuse_gmock_files.py | 240 + .../googlemock/scripts/generator/LICENSE | 203 + .../googlemock/scripts/generator/README | 35 + .../scripts/generator/README.cppclean | 115 + .../scripts/generator/cpp/__init__.py | 0 .../googlemock/scripts/generator/cpp/ast.py | 1733 ++++ .../scripts/generator/cpp/gmock_class.py | 227 + .../scripts/generator/cpp/gmock_class_test.py | 448 + .../scripts/generator/cpp/keywords.py | 59 + .../scripts/generator/cpp/tokenize.py | 287 + .../googlemock/scripts/generator/cpp/utils.py | 41 + .../googlemock/scripts/generator/gmock_gen.py | 31 + .../googlemock/scripts/gmock-config.in | 303 + .../googlemock/scripts/gmock_doctor.py | 640 ++ .../googletest/googlemock/scripts/upload.py | 1387 +++ .../googlemock/scripts/upload_gmock.py | 78 + .../googletest/googlemock/src/gmock-all.cc | 47 + .../googlemock/src/gmock-cardinalities.cc | 156 + .../googlemock/src/gmock-internal-utils.cc | 173 + .../googlemock/src/gmock-matchers.cc | 497 ++ .../googlemock/src/gmock-spec-builders.cc | 830 ++ .../googletest/googlemock/src/gmock.cc | 205 + .../googletest/googlemock/src/gmock_main.cc | 54 + .../googletest/googlemock/test/BUILD.bazel | 54 + .../googlemock/test/gmock-actions_test.cc | 1411 +++ .../test/gmock-cardinalities_test.cc | 428 + .../test/gmock-generated-actions_test.cc | 1228 +++ .../gmock-generated-function-mockers_test.cc | 624 ++ .../gmock-generated-internal-utils_test.cc | 127 + .../test/gmock-generated-matchers_test.cc | 1286 +++ .../test/gmock-internal-utils_test.cc | 698 ++ .../googlemock/test/gmock-matchers_test.cc | 5686 ++++++++++++ .../test/gmock-more-actions_test.cc | 711 ++ .../googlemock/test/gmock-nice-strict_test.cc | 446 + .../googlemock/test/gmock-port_test.cc | 43 + .../test/gmock-spec-builders_test.cc | 2701 ++++++ .../googlemock/test/gmock_all_test.cc | 51 + .../googlemock/test/gmock_ex_test.cc | 81 + .../googlemock/test/gmock_leak_test.py | 108 + .../googlemock/test/gmock_leak_test_.cc | 100 + .../googlemock/test/gmock_link2_test.cc | 40 + .../googlemock/test/gmock_link_test.cc | 40 + .../googlemock/test/gmock_link_test.h | 669 ++ .../googlemock/test/gmock_output_test.py | 180 + .../googlemock/test/gmock_output_test_.cc | 291 + .../test/gmock_output_test_golden.txt | 310 + .../googlemock/test/gmock_stress_test.cc | 322 + .../googletest/googlemock/test/gmock_test.cc | 261 + .../googlemock/test/gmock_test_utils.py | 112 + tools/external/googletest/googletest/CHANGES | 157 + .../googletest/googletest/CMakeLists.txt | 312 + .../googletest/googletest/CONTRIBUTORS | 37 + tools/external/googletest/googletest/LICENSE | 28 + .../googletest/googletest/Makefile.am | 310 + .../external/googletest/googletest/README.md | 395 + .../googletest/googletest/build-aux/.keep | 0 .../googletest/googletest/cmake/gtest.pc.in | 9 + .../googletest/cmake/gtest_main.pc.in | 10 + .../googletest/cmake/internal_utils.cmake | 273 + .../googletest/codegear/gtest.cbproj | 138 + .../googletest/codegear/gtest.groupproj | 54 + .../googletest/codegear/gtest_all.cc | 38 + .../googletest/codegear/gtest_link.cc | 40 + .../googletest/codegear/gtest_main.cbproj | 82 + .../googletest/codegear/gtest_unittest.cbproj | 88 + .../googletest/googletest/configure.ac | 68 + .../googletest/docs/AdvancedGuide.md | 2182 +++++ .../googletest/googletest/docs/DevGuide.md | 130 + .../googletest/docs/Documentation.md | 16 + .../googletest/googletest/docs/FAQ.md | 1092 +++ .../googletest/googletest/docs/Pkgconfig.md | 146 + .../googletest/googletest/docs/Primer.md | 536 ++ .../googletest/googletest/docs/PumpManual.md | 177 + .../googletest/googletest/docs/Samples.md | 14 + .../googletest/googletest/docs/XcodeGuide.md | 93 + .../include/gtest/gtest-death-test.h | 294 + .../googletest/include/gtest/gtest-message.h | 250 + .../include/gtest/gtest-param-test.h | 1444 +++ .../include/gtest/gtest-param-test.h.pump | 510 ++ .../googletest/include/gtest/gtest-printers.h | 1010 +++ .../googletest/include/gtest/gtest-spi.h | 231 + .../include/gtest/gtest-test-part.h | 179 + .../include/gtest/gtest-typed-test.h | 264 + .../googletest/include/gtest/gtest.h | 2240 +++++ .../include/gtest/gtest_pred_impl.h | 358 + .../googletest/include/gtest/gtest_prod.h | 58 + .../gtest/internal/custom/gtest-port.h | 72 + .../gtest/internal/custom/gtest-printers.h | 42 + .../include/gtest/internal/custom/gtest.h | 45 + .../internal/gtest-death-test-internal.h | 319 + .../include/gtest/internal/gtest-filepath.h | 206 + .../include/gtest/internal/gtest-internal.h | 1264 +++ .../include/gtest/internal/gtest-linked_ptr.h | 243 + .../internal/gtest-param-util-generated.h | 5146 +++++++++++ .../gtest-param-util-generated.h.pump | 286 + .../include/gtest/internal/gtest-param-util.h | 730 ++ .../include/gtest/internal/gtest-port-arch.h | 97 + .../include/gtest/internal/gtest-port.h | 2604 ++++++ .../include/gtest/internal/gtest-string.h | 167 + .../include/gtest/internal/gtest-tuple.h | 1020 +++ .../include/gtest/internal/gtest-tuple.h.pump | 347 + .../include/gtest/internal/gtest-type-util.h | 3331 +++++++ .../gtest/internal/gtest-type-util.h.pump | 297 + .../googletest/googletest/m4/acx_pthread.m4 | 363 + .../googletest/googletest/m4/gtest.m4 | 74 + .../googletest/googletest/make/Makefile | 82 + .../googletest/msvc/2010/gtest-md.sln | 55 + .../googletest/msvc/2010/gtest-md.vcxproj | 149 + .../msvc/2010/gtest-md.vcxproj.filters | 18 + .../googletest/googletest/msvc/2010/gtest.sln | 55 + .../googletest/msvc/2010/gtest.vcxproj | 149 + .../msvc/2010/gtest.vcxproj.filters | 18 + .../msvc/2010/gtest_main-md.vcxproj | 154 + .../msvc/2010/gtest_main-md.vcxproj.filters | 18 + .../googletest/msvc/2010/gtest_main.vcxproj | 162 + .../msvc/2010/gtest_main.vcxproj.filters | 18 + .../msvc/2010/gtest_prod_test-md.vcxproj | 199 + .../2010/gtest_prod_test-md.vcxproj.filters | 26 + .../msvc/2010/gtest_prod_test.vcxproj | 191 + .../msvc/2010/gtest_prod_test.vcxproj.filters | 26 + .../msvc/2010/gtest_unittest-md.vcxproj | 188 + .../2010/gtest_unittest-md.vcxproj.filters | 18 + .../msvc/2010/gtest_unittest.vcxproj | 180 + .../msvc/2010/gtest_unittest.vcxproj.filters | 18 + .../googletest/samples/prime_tables.h | 123 + .../googletest/googletest/samples/sample1.cc | 68 + .../googletest/googletest/samples/sample1.h | 43 + .../googletest/samples/sample10_unittest.cc | 140 + .../googletest/samples/sample1_unittest.cc | 154 + .../googletest/googletest/samples/sample2.cc | 56 + .../googletest/googletest/samples/sample2.h | 85 + .../googletest/samples/sample2_unittest.cc | 110 + .../googletest/samples/sample3-inl.h | 172 + .../googletest/samples/sample3_unittest.cc | 152 + .../googletest/googletest/samples/sample4.cc | 46 + .../googletest/googletest/samples/sample4.h | 53 + .../googletest/samples/sample4_unittest.cc | 46 + .../googletest/samples/sample5_unittest.cc | 199 + .../googletest/samples/sample6_unittest.cc | 225 + .../googletest/samples/sample7_unittest.cc | 130 + .../googletest/samples/sample8_unittest.cc | 174 + .../googletest/samples/sample9_unittest.cc | 157 + .../googletest/googletest/scripts/common.py | 83 + .../googletest/scripts/fuse_gtest_files.py | 253 + .../googletest/scripts/gen_gtest_pred_impl.py | 730 ++ .../googletest/scripts/gtest-config.in | 274 + .../googletest/googletest/scripts/pump.py | 855 ++ .../googletest/scripts/release_docs.py | 158 + .../googletest/scripts/test/Makefile | 59 + .../googletest/googletest/scripts/upload.py | 1387 +++ .../googletest/scripts/upload_gtest.py | 78 + .../googletest/googletest/src/gtest-all.cc | 48 + .../googletest/src/gtest-death-test.cc | 1341 +++ .../googletest/src/gtest-filepath.cc | 387 + .../googletest/src/gtest-internal-inl.h | 1183 +++ .../googletest/googletest/src/gtest-port.cc | 1241 +++ .../googletest/src/gtest-printers.cc | 376 + .../googletest/src/gtest-test-part.cc | 110 + .../googletest/src/gtest-typed-test.cc | 118 + .../googletest/googletest/src/gtest.cc | 5431 ++++++++++++ .../googletest/googletest/src/gtest_main.cc | 38 + .../googletest/googletest/test/BUILD.bazel | 118 + .../test/gtest-death-test_ex_test.cc | 93 + .../googletest/test/gtest-death-test_test.cc | 1427 +++ .../googletest/test/gtest-filepath_test.cc | 662 ++ .../googletest/test/gtest-linked_ptr_test.cc | 154 + .../googletest/test/gtest-listener_test.cc | 311 + .../googletest/test/gtest-message_test.cc | 159 + .../googletest/test/gtest-options_test.cc | 215 + .../googletest/test/gtest-param-test2_test.cc | 65 + .../googletest/test/gtest-param-test_test.cc | 1055 +++ .../googletest/test/gtest-param-test_test.h | 57 + .../googletest/test/gtest-port_test.cc | 1311 +++ .../googletest/test/gtest-printers_test.cc | 1700 ++++ .../googletest/test/gtest-test-part_test.cc | 208 + .../googletest/test/gtest-tuple_test.cc | 320 + .../googletest/test/gtest-typed-test2_test.cc | 45 + .../googletest/test/gtest-typed-test_test.cc | 380 + .../googletest/test/gtest-typed-test_test.h | 66 + .../test/gtest-unittest-api_test.cc | 341 + .../googletest/test/gtest_all_test.cc | 47 + .../test/gtest_break_on_failure_unittest.py | 212 + .../test/gtest_break_on_failure_unittest_.cc | 88 + .../test/gtest_catch_exceptions_test.py | 237 + .../test/gtest_catch_exceptions_test_.cc | 311 + .../googletest/test/gtest_color_test.py | 130 + .../googletest/test/gtest_color_test_.cc | 71 + .../googletest/test/gtest_env_var_test.py | 117 + .../googletest/test/gtest_env_var_test_.cc | 126 + .../googletest/test/gtest_environment_test.cc | 192 + .../googletest/test/gtest_filter_unittest.py | 636 ++ .../googletest/test/gtest_filter_unittest_.cc | 140 + .../googletest/test/gtest_help_test.py | 172 + .../googletest/test/gtest_help_test_.cc | 46 + .../test/gtest_list_tests_unittest.py | 207 + .../test/gtest_list_tests_unittest_.cc | 157 + .../googletest/test/gtest_main_unittest.cc | 45 + .../googletest/test/gtest_no_test_unittest.cc | 56 + .../googletest/test/gtest_output_test.py | 340 + .../googletest/test/gtest_output_test_.cc | 1062 +++ .../test/gtest_output_test_golden_lin.txt | 761 ++ .../test/gtest_pred_impl_unittest.cc | 2427 ++++++ .../test/gtest_premature_exit_test.cc | 127 + .../googletest/test/gtest_prod_test.cc | 57 + .../googletest/test/gtest_repeat_test.cc | 253 + .../googletest/test/gtest_shuffle_test.py | 325 + .../googletest/test/gtest_shuffle_test_.cc | 103 + .../googletest/test/gtest_sole_header_test.cc | 57 + .../googletest/test/gtest_stress_test.cc | 256 + .../googletest/test/gtest_test_utils.py | 320 + .../test/gtest_throw_on_failure_ex_test.cc | 92 + .../test/gtest_throw_on_failure_test.py | 171 + .../test/gtest_throw_on_failure_test_.cc | 72 + .../test/gtest_uninitialized_test.py | 70 + .../test/gtest_uninitialized_test_.cc | 43 + .../googletest/test/gtest_unittest.cc | 7717 +++++++++++++++++ .../test/gtest_xml_outfile1_test_.cc | 49 + .../test/gtest_xml_outfile2_test_.cc | 49 + .../test/gtest_xml_outfiles_test.py | 132 + .../test/gtest_xml_output_unittest.py | 311 + .../test/gtest_xml_output_unittest_.cc | 181 + .../googletest/test/gtest_xml_test_utils.py | 194 + .../googletest/googletest/test/production.cc | 36 + .../googletest/googletest/test/production.h | 55 + .../xcode/Config/DebugProject.xcconfig | 30 + .../xcode/Config/FrameworkTarget.xcconfig | 17 + .../googletest/xcode/Config/General.xcconfig | 41 + .../xcode/Config/ReleaseProject.xcconfig | 32 + .../xcode/Config/StaticLibraryTarget.xcconfig | 18 + .../xcode/Config/TestTarget.xcconfig | 8 + .../googletest/xcode/Resources/Info.plist | 30 + .../xcode/Samples/FrameworkSample/Info.plist | 28 + .../WidgetFramework.xcodeproj/project.pbxproj | 457 + .../xcode/Samples/FrameworkSample/runtests.sh | 62 + .../xcode/Samples/FrameworkSample/widget.cc | 63 + .../xcode/Samples/FrameworkSample/widget.h | 59 + .../Samples/FrameworkSample/widget_test.cc | 68 + .../googletest/xcode/Scripts/runtests.sh | 65 + .../xcode/Scripts/versiongenerate.py | 100 + .../xcode/gtest.xcodeproj/project.pbxproj | 1182 +++ tools/nvrtc/CMakeLists.txt | 101 + tools/nvrtc/cutlass/nvrtc/environment.h | 36 + tools/nvrtc/stdlib/assert.h | 0 tools/nvrtc/stdlib/stdint.h | 35 + tools/test/CMakeLists.txt | 24 + tools/test/perf/CMakeLists.txt | 59 + tools/test/perf/cutlass_perf_test.cpp | 76 + tools/test/perf/gemm/cublas_dispatch.h | 92 + tools/test/perf/gemm/cutlass_dispatch.h | 148 + tools/test/perf/gemm/dgemm.cu | 97 + tools/test/perf/gemm/gemm_perf_testbed.h | 624 ++ tools/test/perf/gemm/gemm_profiler.h | 343 + tools/test/perf/gemm/hgemm.cu | 113 + tools/test/perf/gemm/igemm.cu | 95 + tools/test/perf/gemm/sgemm.cu | 101 + tools/test/perf/gemm/wmma_gemm.cu | 173 + tools/test/perf/performance_result.h | 229 + tools/test/perf/testbench_options.h | 583 ++ tools/test/perf/testbench_output.h | 159 + tools/test/unit/CMakeLists.txt | 96 + tools/test/unit/core/layout_verification.cu | 194 + tools/test/unit/core/layout_verification.h | 314 + tools/test/unit/core/predicate_vector.cu | 120 + tools/test/unit/core/tile_iterator.cu | 153 + tools/test/unit/cutlass_unit_test.cpp | 34 + tools/test/unit/cutlass_unit_test.h | 30 + tools/test/unit/gemm/dgemm.cu | 340 + tools/test/unit/gemm/gemm.h | 90 + tools/test/unit/gemm/gemm_nvrtc.cu | 65 + tools/test/unit/gemm/gemm_nvrtc.h | 190 + .../test/unit/gemm/gemm_shared_mem_layouts.cu | 621 ++ tools/test/unit/gemm/gemm_testbed.h | 530 ++ tools/test/unit/gemm/hgemm_128x128x8.cu | 388 + tools/test/unit/gemm/hgemm_128x32x8.cu | 314 + tools/test/unit/gemm/hgemm_128x64x8.cu | 314 + tools/test/unit/gemm/igemm_128x128x32.cu | 351 + .../test/unit/gemm/igemm_128x128x32_float.cu | 352 + tools/test/unit/gemm/igemm_128x128x32_int8.cu | 351 + tools/test/unit/gemm/igemm_128x32x32.cu | 351 + tools/test/unit/gemm/igemm_128x64x32.cu | 351 + tools/test/unit/gemm/sgemm_128x128x8.cu | 373 + tools/test/unit/gemm/sgemm_128x32x8.cu | 332 + tools/test/unit/gemm/sgemm_128x64x8.cu | 356 + tools/test/unit/gemm/sgemm_64x128x8.cu | 43 + tools/test/unit/gemm/sgemm_64x32x8.cu | 324 + tools/test/unit/gemm/sgemm_64x64x8.cu | 332 + tools/test/unit/gemm/wmma_gemm.cu | 280 + tools/test/unit/util/host_tensor.cu | 66 + tools/util/command_line.h | 254 + tools/util/device_memory.h | 178 + tools/util/exceptions.h | 62 + tools/util/half.h | 743 ++ tools/util/host_tensor.h | 362 + tools/util/host_tensor_view.h | 542 ++ tools/util/tensor_view_io.h | 61 + tools/util/type_traits.h | 161 + 1830 files changed, 308981 insertions(+), 11161 deletions(-) create mode 100644 CMake/bin2hex.cmake create mode 100644 CMakeLists.txt create mode 100755 clang-format.sh delete mode 100644 common.mk create mode 100644 cutlass/convert.h create mode 100644 cutlass/coord.h create mode 100644 cutlass/core_io.h create mode 100644 cutlass/cutlass.h create mode 100644 cutlass/fragment.h create mode 100644 cutlass/fragment_load_store.h create mode 100644 cutlass/fragment_multiply_add.h delete mode 100644 cutlass/gemm/block_loader.h delete mode 100644 cutlass/gemm/block_loader_congruous_dp1.h delete mode 100644 cutlass/gemm/block_loader_congruous_idp4.h delete mode 100644 cutlass/gemm/block_loader_crosswise.h delete mode 100644 cutlass/gemm/block_loader_wmma.h delete mode 100644 cutlass/gemm/block_task.h delete mode 100644 cutlass/gemm/block_task_wmma.h create mode 100644 cutlass/gemm/clear_accumulators.h create mode 100644 cutlass/gemm/dgemm_traits.h delete mode 100644 cutlass/gemm/dispatch.h delete mode 100644 cutlass/gemm/dispatch_policies.h delete mode 100644 cutlass/gemm/dp_accummulate.h delete mode 100644 cutlass/gemm/epilogue_function.h create mode 100644 cutlass/gemm/gemm.h create mode 100644 cutlass/gemm/gemm_epilogue.h create mode 100644 cutlass/gemm/gemm_epilogue_traits.h create mode 100644 cutlass/gemm/gemm_global_stream.h create mode 100644 cutlass/gemm/gemm_global_tile.h create mode 100644 cutlass/gemm/gemm_operand.h create mode 100644 cutlass/gemm/gemm_shared_stream.h create mode 100644 cutlass/gemm/gemm_shared_tile.h create mode 100644 cutlass/gemm/gemm_traits.h delete mode 100644 cutlass/gemm/grid_raster.h create mode 100644 cutlass/gemm/hgemm_global_tile.h create mode 100644 cutlass/gemm/hgemm_multiply_add.h create mode 100644 cutlass/gemm/hgemm_swizzle.h create mode 100644 cutlass/gemm/hgemm_traits.h create mode 100644 cutlass/gemm/identity_block_swizzle.h create mode 100644 cutlass/gemm/igemm_epilogue.h create mode 100644 cutlass/gemm/igemm_global_tile.h create mode 100644 cutlass/gemm/igemm_multiply_add.h create mode 100644 cutlass/gemm/igemm_swizzle.h create mode 100644 cutlass/gemm/igemm_traits.h delete mode 100644 cutlass/gemm/k_split_control.h create mode 100644 cutlass/gemm/linear_scaling.h create mode 100644 cutlass/gemm/sgemm_traits.h delete mode 100644 cutlass/gemm/thread_accumulator.h create mode 100644 cutlass/gemm/thread_multiply_add.h delete mode 100644 cutlass/gemm/wmma_accumulator.h create mode 100644 cutlass/gemm/wmma_gemm_epilogue_traits.h create mode 100644 cutlass/gemm/wmma_gemm_global_tile.h create mode 100644 cutlass/gemm/wmma_gemm_multiply_add.h create mode 100644 cutlass/gemm/wmma_gemm_shared_tile.h create mode 100644 cutlass/gemm/wmma_gemm_traits.h create mode 100644 cutlass/iterator_access.h create mode 100644 cutlass/load_store.h create mode 100644 cutlass/matrix_traits.h create mode 100644 cutlass/predicate_vector.h create mode 100644 cutlass/reshape_tile.h create mode 100644 cutlass/shape.h create mode 100644 cutlass/tensor_ref.h create mode 100644 cutlass/tensor_view.h create mode 100644 cutlass/tile_iterator.h create mode 100644 cutlass/tile_traits_standard.h create mode 100644 cutlass/util/cutlass_math.h delete mode 100644 cutlass/util/device_introspection.h delete mode 100644 cutlass/util/io_intrinsics.h delete mode 100644 cutlass/util/math.h delete mode 100644 cutlass/util/matrix_transform.h delete mode 100644 cutlass/util/nv_std.h create mode 100644 cutlass/util/platform.h delete mode 100644 cutlass/util/printable.h delete mode 100644 cutlass/util/util.h create mode 100644 cutlass/vector.h create mode 100644 cutlass/wmma_matrix.h delete mode 100644 cutlass_test/.gitignore delete mode 100644 cutlass_test/Makefile delete mode 100644 cutlass_test/cublas_dispatch.h delete mode 100644 cutlass_test/cutlass_dispatch.h delete mode 100644 cutlass_test/gemm.cu delete mode 100644 cutlass_test/util/command_line.h delete mode 100644 cutlass_test/util/exceptions.h delete mode 100644 cutlass_test/util/half.h delete mode 100644 cutlass_test/util/matrix.h delete mode 100644 cutlass_test/util/timer.h delete mode 100644 cutlass_test/util/type_conversion.h create mode 100644 docs/generated-html/annotated.html create mode 100644 docs/generated-html/bc_s.png create mode 100644 docs/generated-html/bdwn.png create mode 100644 docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator-members.html create mode 100644 docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html create mode 100644 docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html create mode 100644 docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html create mode 100644 docs/generated-html/classcutlass_1_1TensorRef-members.html create mode 100644 docs/generated-html/classcutlass_1_1TensorRef.html create mode 100644 docs/generated-html/classcutlass_1_1TensorView-members.html create mode 100644 docs/generated-html/classcutlass_1_1TensorView.html create mode 100644 docs/generated-html/classcutlass_1_1TensorView.png create mode 100644 docs/generated-html/classcutlass_1_1platform_1_1unique__ptr-members.html create mode 100644 docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html create mode 100644 docs/generated-html/classes.html create mode 100644 docs/generated-html/classnv__std_1_1unique__ptr-members.html create mode 100644 docs/generated-html/classnv__std_1_1unique__ptr.html create mode 100644 docs/generated-html/clear__accumulators_8h.html create mode 100644 docs/generated-html/clear__accumulators_8h_source.html create mode 100644 docs/generated-html/closed.png create mode 100644 docs/generated-html/convert_8h.html create mode 100644 docs/generated-html/convert_8h_source.html create mode 100644 docs/generated-html/coord_8h.html create mode 100644 docs/generated-html/coord_8h_source.html create mode 100644 docs/generated-html/core__io_8h.html create mode 100644 docs/generated-html/core__io_8h_source.html create mode 100644 docs/generated-html/cutlass_8h.html create mode 100644 docs/generated-html/cutlass_8h_source.html create mode 100644 docs/generated-html/cutlass__math_8h.html create mode 100644 docs/generated-html/cutlass__math_8h_source.html create mode 100644 docs/generated-html/debug_8h.html create mode 100644 docs/generated-html/debug_8h_source.html create mode 100644 docs/generated-html/dgemm__traits_8h.html create mode 100644 docs/generated-html/dgemm__traits_8h_source.html create mode 100644 docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html create mode 100644 docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html create mode 100644 docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html create mode 100644 docs/generated-html/doc.png create mode 100644 docs/generated-html/doxygen.css create mode 100644 docs/generated-html/doxygen.png create mode 100644 docs/generated-html/dynsections.js create mode 100644 docs/generated-html/files.html create mode 100644 docs/generated-html/folderclosed.png create mode 100644 docs/generated-html/folderopen.png create mode 100644 docs/generated-html/fragment_8h.html create mode 100644 docs/generated-html/fragment_8h_source.html create mode 100644 docs/generated-html/fragment__load__store_8h.html create mode 100644 docs/generated-html/fragment__load__store_8h_source.html create mode 100644 docs/generated-html/fragment__multiply__add_8h.html create mode 100644 docs/generated-html/fragment__multiply__add_8h_source.html create mode 100644 docs/generated-html/fragment__stream_8h.html create mode 100644 docs/generated-html/fragment__stream_8h_source.html create mode 100644 docs/generated-html/functions.html create mode 100644 docs/generated-html/functions_0x7e.html create mode 100644 docs/generated-html/functions_b.html create mode 100644 docs/generated-html/functions_c.html create mode 100644 docs/generated-html/functions_d.html create mode 100644 docs/generated-html/functions_e.html create mode 100644 docs/generated-html/functions_enum.html create mode 100644 docs/generated-html/functions_eval.html create mode 100644 docs/generated-html/functions_f.html create mode 100644 docs/generated-html/functions_func.html create mode 100644 docs/generated-html/functions_func_0x7e.html create mode 100644 docs/generated-html/functions_func_b.html create mode 100644 docs/generated-html/functions_func_c.html create mode 100644 docs/generated-html/functions_func_d.html create mode 100644 docs/generated-html/functions_func_e.html create mode 100644 docs/generated-html/functions_func_f.html create mode 100644 docs/generated-html/functions_func_g.html create mode 100644 docs/generated-html/functions_func_h.html create mode 100644 docs/generated-html/functions_func_i.html create mode 100644 docs/generated-html/functions_func_l.html create mode 100644 docs/generated-html/functions_func_m.html create mode 100644 docs/generated-html/functions_func_o.html create mode 100644 docs/generated-html/functions_func_p.html create mode 100644 docs/generated-html/functions_func_r.html create mode 100644 docs/generated-html/functions_func_s.html create mode 100644 docs/generated-html/functions_func_t.html create mode 100644 docs/generated-html/functions_func_u.html create mode 100644 docs/generated-html/functions_func_v.html create mode 100644 docs/generated-html/functions_func_w.html create mode 100644 docs/generated-html/functions_g.html create mode 100644 docs/generated-html/functions_h.html create mode 100644 docs/generated-html/functions_i.html create mode 100644 docs/generated-html/functions_k.html create mode 100644 docs/generated-html/functions_l.html create mode 100644 docs/generated-html/functions_m.html create mode 100644 docs/generated-html/functions_n.html create mode 100644 docs/generated-html/functions_o.html create mode 100644 docs/generated-html/functions_p.html create mode 100644 docs/generated-html/functions_r.html create mode 100644 docs/generated-html/functions_s.html create mode 100644 docs/generated-html/functions_t.html create mode 100644 docs/generated-html/functions_type.html create mode 100644 docs/generated-html/functions_type_b.html create mode 100644 docs/generated-html/functions_type_c.html create mode 100644 docs/generated-html/functions_type_d.html create mode 100644 docs/generated-html/functions_type_e.html create mode 100644 docs/generated-html/functions_type_f.html create mode 100644 docs/generated-html/functions_type_g.html create mode 100644 docs/generated-html/functions_type_i.html create mode 100644 docs/generated-html/functions_type_l.html create mode 100644 docs/generated-html/functions_type_m.html create mode 100644 docs/generated-html/functions_type_n.html create mode 100644 docs/generated-html/functions_type_o.html create mode 100644 docs/generated-html/functions_type_p.html create mode 100644 docs/generated-html/functions_type_s.html create mode 100644 docs/generated-html/functions_type_t.html create mode 100644 docs/generated-html/functions_type_v.html create mode 100644 docs/generated-html/functions_type_w.html create mode 100644 docs/generated-html/functions_type_y.html create mode 100644 docs/generated-html/functions_u.html create mode 100644 docs/generated-html/functions_v.html create mode 100644 docs/generated-html/functions_vars.html create mode 100644 docs/generated-html/functions_vars_b.html create mode 100644 docs/generated-html/functions_vars_c.html create mode 100644 docs/generated-html/functions_vars_d.html create mode 100644 docs/generated-html/functions_vars_e.html create mode 100644 docs/generated-html/functions_vars_f.html create mode 100644 docs/generated-html/functions_vars_g.html create mode 100644 docs/generated-html/functions_vars_i.html create mode 100644 docs/generated-html/functions_vars_k.html create mode 100644 docs/generated-html/functions_vars_l.html create mode 100644 docs/generated-html/functions_vars_m.html create mode 100644 docs/generated-html/functions_vars_n.html create mode 100644 docs/generated-html/functions_vars_p.html create mode 100644 docs/generated-html/functions_vars_r.html create mode 100644 docs/generated-html/functions_vars_s.html create mode 100644 docs/generated-html/functions_vars_t.html create mode 100644 docs/generated-html/functions_vars_v.html create mode 100644 docs/generated-html/functions_w.html create mode 100644 docs/generated-html/functions_y.html create mode 100644 docs/generated-html/gemm_8h.html create mode 100644 docs/generated-html/gemm_8h_source.html create mode 100644 docs/generated-html/gemm__epilogue_8h.html create mode 100644 docs/generated-html/gemm__epilogue_8h_source.html create mode 100644 docs/generated-html/gemm__epilogue__traits_8h.html create mode 100644 docs/generated-html/gemm__epilogue__traits_8h_source.html create mode 100644 docs/generated-html/gemm__fragment__stream_8h.html create mode 100644 docs/generated-html/gemm__fragment__stream_8h_source.html create mode 100644 docs/generated-html/gemm__global__stream_8h.html create mode 100644 docs/generated-html/gemm__global__stream_8h_source.html create mode 100644 docs/generated-html/gemm__global__tile_8h.html create mode 100644 docs/generated-html/gemm__global__tile_8h_source.html create mode 100644 docs/generated-html/gemm__operand_8h.html create mode 100644 docs/generated-html/gemm__operand_8h_source.html create mode 100644 docs/generated-html/gemm__shared__stream_8h.html create mode 100644 docs/generated-html/gemm__shared__stream_8h_source.html create mode 100644 docs/generated-html/gemm__shared__tile_8h.html create mode 100644 docs/generated-html/gemm__shared__tile_8h_source.html create mode 100644 docs/generated-html/gemm__traits_8h.html create mode 100644 docs/generated-html/gemm__traits_8h_source.html create mode 100644 docs/generated-html/globals.html create mode 100644 docs/generated-html/globals_defs.html create mode 100644 docs/generated-html/globals_func.html create mode 100644 docs/generated-html/group__fragment__concept.html create mode 100644 docs/generated-html/group__fragment__iterator__concept.html create mode 100644 docs/generated-html/group__layout__concept.html create mode 100644 docs/generated-html/group__predicate__iterator__concept.html create mode 100644 docs/generated-html/group__predicate__tile__adapter.html create mode 100644 docs/generated-html/group__predicate__vector__concept.html create mode 100644 docs/generated-html/group__tile__load__iterator__concept.html create mode 100644 docs/generated-html/group__tile__store__iterator__concept.html create mode 100644 docs/generated-html/group__tile__traits__concept.html create mode 100644 docs/generated-html/hgemm__global__tile_8h.html create mode 100644 docs/generated-html/hgemm__global__tile_8h_source.html create mode 100644 docs/generated-html/hgemm__multiply__add_8h.html create mode 100644 docs/generated-html/hgemm__multiply__add_8h_source.html create mode 100644 docs/generated-html/hgemm__swizzle_8h.html create mode 100644 docs/generated-html/hgemm__swizzle_8h_source.html create mode 100644 docs/generated-html/hgemm__traits_8h.html create mode 100644 docs/generated-html/hgemm__traits_8h_source.html create mode 100644 docs/generated-html/hierarchy.html create mode 100644 docs/generated-html/identity__block__swizzle_8h.html create mode 100644 docs/generated-html/identity__block__swizzle_8h_source.html create mode 100644 docs/generated-html/igemm__epilogue_8h.html create mode 100644 docs/generated-html/igemm__epilogue_8h_source.html create mode 100644 docs/generated-html/igemm__global__tile_8h.html create mode 100644 docs/generated-html/igemm__global__tile_8h_source.html create mode 100644 docs/generated-html/igemm__multiply__add_8h.html create mode 100644 docs/generated-html/igemm__multiply__add_8h_source.html create mode 100644 docs/generated-html/igemm__swizzle_8h.html create mode 100644 docs/generated-html/igemm__swizzle_8h_source.html create mode 100644 docs/generated-html/igemm__traits_8h.html create mode 100644 docs/generated-html/igemm__traits_8h_source.html create mode 100644 docs/generated-html/index.html create mode 100644 docs/generated-html/iterator__access_8h.html create mode 100644 docs/generated-html/iterator__access_8h_source.html create mode 100644 docs/generated-html/jquery.js create mode 100644 docs/generated-html/linear__scaling_8h.html create mode 100644 docs/generated-html/linear__scaling_8h_source.html create mode 100644 docs/generated-html/load__store_8h.html create mode 100644 docs/generated-html/load__store_8h_source.html create mode 100644 docs/generated-html/matrix__traits_8h.html create mode 100644 docs/generated-html/matrix__traits_8h_source.html create mode 100644 docs/generated-html/menu.js create mode 100644 docs/generated-html/menudata.js create mode 100644 docs/generated-html/modules.html create mode 100644 docs/generated-html/namespacecutlass.html create mode 100644 docs/generated-html/namespacecutlass_1_1gemm.html create mode 100644 docs/generated-html/namespacecutlass_1_1platform.html create mode 100644 docs/generated-html/namespacemembers.html create mode 100644 docs/generated-html/namespacemembers_func.html create mode 100644 docs/generated-html/namespacemembers_type.html create mode 100644 docs/generated-html/namespacenv__std.html create mode 100644 docs/generated-html/namespaces.html create mode 100644 docs/generated-html/nav_f.png create mode 100644 docs/generated-html/nav_g.png create mode 100644 docs/generated-html/nav_h.png create mode 100644 docs/generated-html/nv__std_8h.html create mode 100644 docs/generated-html/nv__std_8h_source.html create mode 100644 docs/generated-html/open.png create mode 100644 docs/generated-html/platform_8h.html create mode 100644 docs/generated-html/platform_8h_source.html create mode 100644 docs/generated-html/predicate__vector_8h.html create mode 100644 docs/generated-html/predicate__vector_8h_source.html create mode 100644 docs/generated-html/reshape__tile_8h.html create mode 100644 docs/generated-html/reshape__tile_8h_source.html create mode 100644 docs/generated-html/search/all_0.html create mode 100644 docs/generated-html/search/all_0.js create mode 100644 docs/generated-html/search/all_1.html create mode 100644 docs/generated-html/search/all_1.js create mode 100644 docs/generated-html/search/all_10.html create mode 100644 docs/generated-html/search/all_10.js create mode 100644 docs/generated-html/search/all_11.html create mode 100644 docs/generated-html/search/all_11.js create mode 100644 docs/generated-html/search/all_12.html create mode 100644 docs/generated-html/search/all_12.js create mode 100644 docs/generated-html/search/all_13.html create mode 100644 docs/generated-html/search/all_13.js create mode 100644 docs/generated-html/search/all_14.html create mode 100644 docs/generated-html/search/all_14.js create mode 100644 docs/generated-html/search/all_15.html create mode 100644 docs/generated-html/search/all_15.js create mode 100644 docs/generated-html/search/all_16.html create mode 100644 docs/generated-html/search/all_16.js create mode 100644 docs/generated-html/search/all_17.html create mode 100644 docs/generated-html/search/all_17.js create mode 100644 docs/generated-html/search/all_2.html create mode 100644 docs/generated-html/search/all_2.js create mode 100644 docs/generated-html/search/all_3.html create mode 100644 docs/generated-html/search/all_3.js create mode 100644 docs/generated-html/search/all_4.html create mode 100644 docs/generated-html/search/all_4.js create mode 100644 docs/generated-html/search/all_5.html create mode 100644 docs/generated-html/search/all_5.js create mode 100644 docs/generated-html/search/all_6.html create mode 100644 docs/generated-html/search/all_6.js create mode 100644 docs/generated-html/search/all_7.html create mode 100644 docs/generated-html/search/all_7.js create mode 100644 docs/generated-html/search/all_8.html create mode 100644 docs/generated-html/search/all_8.js create mode 100644 docs/generated-html/search/all_9.html create mode 100644 docs/generated-html/search/all_9.js create mode 100644 docs/generated-html/search/all_a.html create mode 100644 docs/generated-html/search/all_a.js create mode 100644 docs/generated-html/search/all_b.html create mode 100644 docs/generated-html/search/all_b.js create mode 100644 docs/generated-html/search/all_c.html create mode 100644 docs/generated-html/search/all_c.js create mode 100644 docs/generated-html/search/all_d.html create mode 100644 docs/generated-html/search/all_d.js create mode 100644 docs/generated-html/search/all_e.html create mode 100644 docs/generated-html/search/all_e.js create mode 100644 docs/generated-html/search/all_f.html create mode 100644 docs/generated-html/search/all_f.js create mode 100644 docs/generated-html/search/classes_0.html create mode 100644 docs/generated-html/search/classes_0.js create mode 100644 docs/generated-html/search/classes_1.html create mode 100644 docs/generated-html/search/classes_1.js create mode 100644 docs/generated-html/search/classes_10.html create mode 100644 docs/generated-html/search/classes_10.js create mode 100644 docs/generated-html/search/classes_11.html create mode 100644 docs/generated-html/search/classes_11.js create mode 100644 docs/generated-html/search/classes_12.html create mode 100644 docs/generated-html/search/classes_12.js create mode 100644 docs/generated-html/search/classes_2.html create mode 100644 docs/generated-html/search/classes_2.js create mode 100644 docs/generated-html/search/classes_3.html create mode 100644 docs/generated-html/search/classes_3.js create mode 100644 docs/generated-html/search/classes_4.html create mode 100644 docs/generated-html/search/classes_4.js create mode 100644 docs/generated-html/search/classes_5.html create mode 100644 docs/generated-html/search/classes_5.js create mode 100644 docs/generated-html/search/classes_6.html create mode 100644 docs/generated-html/search/classes_6.js create mode 100644 docs/generated-html/search/classes_7.html create mode 100644 docs/generated-html/search/classes_7.js create mode 100644 docs/generated-html/search/classes_8.html create mode 100644 docs/generated-html/search/classes_8.js create mode 100644 docs/generated-html/search/classes_9.html create mode 100644 docs/generated-html/search/classes_9.js create mode 100644 docs/generated-html/search/classes_a.html create mode 100644 docs/generated-html/search/classes_a.js create mode 100644 docs/generated-html/search/classes_b.html create mode 100644 docs/generated-html/search/classes_b.js create mode 100644 docs/generated-html/search/classes_c.html create mode 100644 docs/generated-html/search/classes_c.js create mode 100644 docs/generated-html/search/classes_d.html create mode 100644 docs/generated-html/search/classes_d.js create mode 100644 docs/generated-html/search/classes_e.html create mode 100644 docs/generated-html/search/classes_e.js create mode 100644 docs/generated-html/search/classes_f.html create mode 100644 docs/generated-html/search/classes_f.js create mode 100644 docs/generated-html/search/close.png create mode 100644 docs/generated-html/search/defines_0.html create mode 100644 docs/generated-html/search/defines_0.js create mode 100644 docs/generated-html/search/defines_1.html create mode 100644 docs/generated-html/search/defines_1.js create mode 100644 docs/generated-html/search/defines_2.html create mode 100644 docs/generated-html/search/defines_2.js create mode 100644 docs/generated-html/search/defines_3.html create mode 100644 docs/generated-html/search/defines_3.js create mode 100644 docs/generated-html/search/enums_0.html create mode 100644 docs/generated-html/search/enums_0.js create mode 100644 docs/generated-html/search/enumvalues_0.html create mode 100644 docs/generated-html/search/enumvalues_0.js create mode 100644 docs/generated-html/search/enumvalues_1.html create mode 100644 docs/generated-html/search/enumvalues_1.js create mode 100644 docs/generated-html/search/enumvalues_2.html create mode 100644 docs/generated-html/search/enumvalues_2.js create mode 100644 docs/generated-html/search/enumvalues_3.html create mode 100644 docs/generated-html/search/enumvalues_3.js create mode 100644 docs/generated-html/search/files_0.html create mode 100644 docs/generated-html/search/files_0.js create mode 100644 docs/generated-html/search/files_1.html create mode 100644 docs/generated-html/search/files_1.js create mode 100644 docs/generated-html/search/files_2.html create mode 100644 docs/generated-html/search/files_2.js create mode 100644 docs/generated-html/search/files_3.html create mode 100644 docs/generated-html/search/files_3.js create mode 100644 docs/generated-html/search/files_4.html create mode 100644 docs/generated-html/search/files_4.js create mode 100644 docs/generated-html/search/files_5.html create mode 100644 docs/generated-html/search/files_5.js create mode 100644 docs/generated-html/search/files_6.html create mode 100644 docs/generated-html/search/files_6.js create mode 100644 docs/generated-html/search/files_7.html create mode 100644 docs/generated-html/search/files_7.js create mode 100644 docs/generated-html/search/files_8.html create mode 100644 docs/generated-html/search/files_8.js create mode 100644 docs/generated-html/search/files_9.html create mode 100644 docs/generated-html/search/files_9.js create mode 100644 docs/generated-html/search/files_a.html create mode 100644 docs/generated-html/search/files_a.js create mode 100644 docs/generated-html/search/files_b.html create mode 100644 docs/generated-html/search/files_b.js create mode 100644 docs/generated-html/search/files_c.html create mode 100644 docs/generated-html/search/files_c.js create mode 100644 docs/generated-html/search/files_d.html create mode 100644 docs/generated-html/search/files_d.js create mode 100644 docs/generated-html/search/files_e.html create mode 100644 docs/generated-html/search/files_e.js create mode 100644 docs/generated-html/search/functions_0.html create mode 100644 docs/generated-html/search/functions_0.js create mode 100644 docs/generated-html/search/functions_1.html create mode 100644 docs/generated-html/search/functions_1.js create mode 100644 docs/generated-html/search/functions_10.html create mode 100644 docs/generated-html/search/functions_10.js create mode 100644 docs/generated-html/search/functions_11.html create mode 100644 docs/generated-html/search/functions_11.js create mode 100644 docs/generated-html/search/functions_12.html create mode 100644 docs/generated-html/search/functions_12.js create mode 100644 docs/generated-html/search/functions_13.html create mode 100644 docs/generated-html/search/functions_13.js create mode 100644 docs/generated-html/search/functions_14.html create mode 100644 docs/generated-html/search/functions_14.js create mode 100644 docs/generated-html/search/functions_2.html create mode 100644 docs/generated-html/search/functions_2.js create mode 100644 docs/generated-html/search/functions_3.html create mode 100644 docs/generated-html/search/functions_3.js create mode 100644 docs/generated-html/search/functions_4.html create mode 100644 docs/generated-html/search/functions_4.js create mode 100644 docs/generated-html/search/functions_5.html create mode 100644 docs/generated-html/search/functions_5.js create mode 100644 docs/generated-html/search/functions_6.html create mode 100644 docs/generated-html/search/functions_6.js create mode 100644 docs/generated-html/search/functions_7.html create mode 100644 docs/generated-html/search/functions_7.js create mode 100644 docs/generated-html/search/functions_8.html create mode 100644 docs/generated-html/search/functions_8.js create mode 100644 docs/generated-html/search/functions_9.html create mode 100644 docs/generated-html/search/functions_9.js create mode 100644 docs/generated-html/search/functions_a.html create mode 100644 docs/generated-html/search/functions_a.js create mode 100644 docs/generated-html/search/functions_b.html create mode 100644 docs/generated-html/search/functions_b.js create mode 100644 docs/generated-html/search/functions_c.html create mode 100644 docs/generated-html/search/functions_c.js create mode 100644 docs/generated-html/search/functions_d.html create mode 100644 docs/generated-html/search/functions_d.js create mode 100644 docs/generated-html/search/functions_e.html create mode 100644 docs/generated-html/search/functions_e.js create mode 100644 docs/generated-html/search/functions_f.html create mode 100644 docs/generated-html/search/functions_f.js create mode 100644 docs/generated-html/search/groups_0.html create mode 100644 docs/generated-html/search/groups_0.js create mode 100644 docs/generated-html/search/groups_1.html create mode 100644 docs/generated-html/search/groups_1.js create mode 100644 docs/generated-html/search/groups_2.html create mode 100644 docs/generated-html/search/groups_2.js create mode 100644 docs/generated-html/search/groups_3.html create mode 100644 docs/generated-html/search/groups_3.js create mode 100644 docs/generated-html/search/mag_sel.png create mode 100644 docs/generated-html/search/namespaces_0.html create mode 100644 docs/generated-html/search/namespaces_0.js create mode 100644 docs/generated-html/search/namespaces_1.html create mode 100644 docs/generated-html/search/namespaces_1.js create mode 100644 docs/generated-html/search/nomatches.html create mode 100644 docs/generated-html/search/search.css create mode 100644 docs/generated-html/search/search.js create mode 100644 docs/generated-html/search/search_l.png create mode 100644 docs/generated-html/search/search_m.png create mode 100644 docs/generated-html/search/search_r.png create mode 100644 docs/generated-html/search/searchdata.js create mode 100644 docs/generated-html/search/typedefs_0.html create mode 100644 docs/generated-html/search/typedefs_0.js create mode 100644 docs/generated-html/search/typedefs_1.html create mode 100644 docs/generated-html/search/typedefs_1.js create mode 100644 docs/generated-html/search/typedefs_10.html create mode 100644 docs/generated-html/search/typedefs_10.js create mode 100644 docs/generated-html/search/typedefs_11.html create mode 100644 docs/generated-html/search/typedefs_11.js create mode 100644 docs/generated-html/search/typedefs_2.html create mode 100644 docs/generated-html/search/typedefs_2.js create mode 100644 docs/generated-html/search/typedefs_3.html create mode 100644 docs/generated-html/search/typedefs_3.js create mode 100644 docs/generated-html/search/typedefs_4.html create mode 100644 docs/generated-html/search/typedefs_4.js create mode 100644 docs/generated-html/search/typedefs_5.html create mode 100644 docs/generated-html/search/typedefs_5.js create mode 100644 docs/generated-html/search/typedefs_6.html create mode 100644 docs/generated-html/search/typedefs_6.js create mode 100644 docs/generated-html/search/typedefs_7.html create mode 100644 docs/generated-html/search/typedefs_7.js create mode 100644 docs/generated-html/search/typedefs_8.html create mode 100644 docs/generated-html/search/typedefs_8.js create mode 100644 docs/generated-html/search/typedefs_9.html create mode 100644 docs/generated-html/search/typedefs_9.js create mode 100644 docs/generated-html/search/typedefs_a.html create mode 100644 docs/generated-html/search/typedefs_a.js create mode 100644 docs/generated-html/search/typedefs_b.html create mode 100644 docs/generated-html/search/typedefs_b.js create mode 100644 docs/generated-html/search/typedefs_c.html create mode 100644 docs/generated-html/search/typedefs_c.js create mode 100644 docs/generated-html/search/typedefs_d.html create mode 100644 docs/generated-html/search/typedefs_d.js create mode 100644 docs/generated-html/search/typedefs_e.html create mode 100644 docs/generated-html/search/typedefs_e.js create mode 100644 docs/generated-html/search/typedefs_f.html create mode 100644 docs/generated-html/search/typedefs_f.js create mode 100644 docs/generated-html/search/variables_0.html create mode 100644 docs/generated-html/search/variables_0.js create mode 100644 docs/generated-html/search/variables_1.html create mode 100644 docs/generated-html/search/variables_1.js create mode 100644 docs/generated-html/search/variables_10.html create mode 100644 docs/generated-html/search/variables_10.js create mode 100644 docs/generated-html/search/variables_2.html create mode 100644 docs/generated-html/search/variables_2.js create mode 100644 docs/generated-html/search/variables_3.html create mode 100644 docs/generated-html/search/variables_3.js create mode 100644 docs/generated-html/search/variables_4.html create mode 100644 docs/generated-html/search/variables_4.js create mode 100644 docs/generated-html/search/variables_5.html create mode 100644 docs/generated-html/search/variables_5.js create mode 100644 docs/generated-html/search/variables_6.html create mode 100644 docs/generated-html/search/variables_6.js create mode 100644 docs/generated-html/search/variables_7.html create mode 100644 docs/generated-html/search/variables_7.js create mode 100644 docs/generated-html/search/variables_8.html create mode 100644 docs/generated-html/search/variables_8.js create mode 100644 docs/generated-html/search/variables_9.html create mode 100644 docs/generated-html/search/variables_9.js create mode 100644 docs/generated-html/search/variables_a.html create mode 100644 docs/generated-html/search/variables_a.js create mode 100644 docs/generated-html/search/variables_b.html create mode 100644 docs/generated-html/search/variables_b.js create mode 100644 docs/generated-html/search/variables_c.html create mode 100644 docs/generated-html/search/variables_c.js create mode 100644 docs/generated-html/search/variables_d.html create mode 100644 docs/generated-html/search/variables_d.js create mode 100644 docs/generated-html/search/variables_e.html create mode 100644 docs/generated-html/search/variables_e.js create mode 100644 docs/generated-html/search/variables_f.html create mode 100644 docs/generated-html/search/variables_f.js create mode 100644 docs/generated-html/sgemm__traits_8h.html create mode 100644 docs/generated-html/sgemm__traits_8h_source.html create mode 100644 docs/generated-html/shape_8h.html create mode 100644 docs/generated-html/shape_8h_source.html create mode 100644 docs/generated-html/splitbar.png create mode 100644 docs/generated-html/structcutlass_1_1AlignedStruct.html create mode 100644 docs/generated-html/structcutlass_1_1AlignedStruct.png create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html create mode 100644 docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html create mode 100644 docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html create mode 100644 docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html create mode 100644 docs/generated-html/structcutlass_1_1Convert.html create mode 100644 docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html create mode 100644 docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html create mode 100644 docs/generated-html/structcutlass_1_1Coord-members.html create mode 100644 docs/generated-html/structcutlass_1_1Coord.html create mode 100644 docs/generated-html/structcutlass_1_1Copy-members.html create mode 100644 docs/generated-html/structcutlass_1_1Copy.html create mode 100644 docs/generated-html/structcutlass_1_1Extent-members.html create mode 100644 docs/generated-html/structcutlass_1_1Extent.html create mode 100644 docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Fragment-members.html create mode 100644 docs/generated-html/structcutlass_1_1Fragment.html create mode 100644 docs/generated-html/structcutlass_1_1Fragment.png create mode 100644 docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentConstIterator.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentIterator-members.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentIterator.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentLoad.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStore.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStream.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1GemmOperand-members.html create mode 100644 docs/generated-html/structcutlass_1_1GemmOperand.html create mode 100644 docs/generated-html/structcutlass_1_1Identity-members.html create mode 100644 docs/generated-html/structcutlass_1_1Identity.html create mode 100644 docs/generated-html/structcutlass_1_1IteratorAdvance-members.html create mode 100644 docs/generated-html/structcutlass_1_1IteratorAdvance.html create mode 100644 docs/generated-html/structcutlass_1_1IteratorFragment-members.html create mode 100644 docs/generated-html/structcutlass_1_1IteratorFragment.html create mode 100644 docs/generated-html/structcutlass_1_1Load-members.html create mode 100644 docs/generated-html/structcutlass_1_1Load.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1MatrixLayout-members.html create mode 100644 docs/generated-html/structcutlass_1_1MatrixLayout.html create mode 100644 docs/generated-html/structcutlass_1_1MemorySpace-members.html create mode 100644 docs/generated-html/structcutlass_1_1MemorySpace.html create mode 100644 docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html create mode 100644 docs/generated-html/structcutlass_1_1PredicateTileAdapter.html create mode 100644 docs/generated-html/structcutlass_1_1PredicateVector-members.html create mode 100644 docs/generated-html/structcutlass_1_1PredicateVector.html create mode 100644 docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html create mode 100644 docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html create mode 100644 docs/generated-html/structcutlass_1_1ReshapeTile-members.html create mode 100644 docs/generated-html/structcutlass_1_1ReshapeTile.html create mode 100644 docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Shape-members.html create mode 100644 docs/generated-html/structcutlass_1_1Shape.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeAdd-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeAdd.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeCount-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeCount.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeDiv-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeDiv.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeMax-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeMax.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeMin-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeMin.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeMul-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeMul.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeScale-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeScale.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeStrides-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeStrides.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeSub-members.html create mode 100644 docs/generated-html/structcutlass_1_1ShapeSub.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType-members.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Store-members.html create mode 100644 docs/generated-html/structcutlass_1_1Store.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase.png create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.png create mode 100644 docs/generated-html/structcutlass_1_1TileLoadIterator-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileLoadIterator.html create mode 100644 docs/generated-html/structcutlass_1_1TileLoadIterator.png create mode 100644 docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.png create mode 100644 docs/generated-html/structcutlass_1_1TileStoreIterator-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileStoreIterator.html create mode 100644 docs/generated-html/structcutlass_1_1TileStoreIterator.png create mode 100644 docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.png create mode 100644 docs/generated-html/structcutlass_1_1TileTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraits.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsStandard.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1TiledThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html create mode 100644 docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html create mode 100644 docs/generated-html/structcutlass_1_1VectorTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1VectorTraits.html create mode 100644 docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1Vectorize-members.html create mode 100644 docs/generated-html/structcutlass_1_1Vectorize.html create mode 100644 docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1divide__assert-members.html create mode 100644 docs/generated-html/structcutlass_1_1divide__assert.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1SharedStorage.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.png create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html create mode 100644 docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html create mode 100644 docs/generated-html/structcutlass_1_1is__pow2-members.html create mode 100644 docs/generated-html/structcutlass_1_1is__pow2.html create mode 100644 docs/generated-html/structcutlass_1_1is__pow2.png create mode 100644 docs/generated-html/structcutlass_1_1log2__down-members.html create mode 100644 docs/generated-html/structcutlass_1_1log2__down.html create mode 100644 docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1log2__up-members.html create mode 100644 docs/generated-html/structcutlass_1_1log2__up.html create mode 100644 docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1bool__constant.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1conditional-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1conditional.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1default__delete.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1enable__if.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1greater-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1greater.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1integral__constant.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__fundamental-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__same.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__same.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__void.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__void.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__volatile-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__volatile.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.png create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1less-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1less.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1plus-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1plus.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__const.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structcutlass_1_1sqrt__est-members.html create mode 100644 docs/generated-html/structcutlass_1_1sqrt__est.html create mode 100644 docs/generated-html/structnv__std_1_1aligned__chunk.html create mode 100644 docs/generated-html/structnv__std_1_1aligned__storage-members.html create mode 100644 docs/generated-html/structnv__std_1_1aligned__storage.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of.png create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1bool__constant-members.html create mode 100644 docs/generated-html/structnv__std_1_1bool__constant.html create mode 100644 docs/generated-html/structnv__std_1_1bool__constant.png create mode 100644 docs/generated-html/structnv__std_1_1conditional-members.html create mode 100644 docs/generated-html/structnv__std_1_1conditional.html create mode 100644 docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1default__delete-members.html create mode 100644 docs/generated-html/structnv__std_1_1default__delete.html create mode 100644 docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html create mode 100644 docs/generated-html/structnv__std_1_1enable__if-members.html create mode 100644 docs/generated-html/structnv__std_1_1enable__if.html create mode 100644 docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1greater-members.html create mode 100644 docs/generated-html/structnv__std_1_1greater.html create mode 100644 docs/generated-html/structnv__std_1_1integral__constant-members.html create mode 100644 docs/generated-html/structnv__std_1_1integral__constant.html create mode 100644 docs/generated-html/structnv__std_1_1integral__constant.png create mode 100644 docs/generated-html/structnv__std_1_1is__arithmetic-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__arithmetic.html create mode 100644 docs/generated-html/structnv__std_1_1is__arithmetic.png create mode 100644 docs/generated-html/structnv__std_1_1is__base__of-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__base__of.html create mode 100644 docs/generated-html/structnv__std_1_1is__base__of.png create mode 100644 docs/generated-html/structnv__std_1_1is__base__of__helper-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__base__of__helper.html create mode 100644 docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html create mode 100644 docs/generated-html/structnv__std_1_1is__floating__point-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__floating__point.html create mode 100644 docs/generated-html/structnv__std_1_1is__floating__point.png create mode 100644 docs/generated-html/structnv__std_1_1is__fundamental-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__fundamental.html create mode 100644 docs/generated-html/structnv__std_1_1is__fundamental.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__pointer-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__pointer.html create mode 100644 docs/generated-html/structnv__std_1_1is__pointer.png create mode 100644 docs/generated-html/structnv__std_1_1is__pointer__helper-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__pointer__helper.html create mode 100644 docs/generated-html/structnv__std_1_1is__pointer__helper.png create mode 100644 docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__same-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__same.html create mode 100644 docs/generated-html/structnv__std_1_1is__same.png create mode 100644 docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__trivially__copyable.html create mode 100644 docs/generated-html/structnv__std_1_1is__trivially__copyable.png create mode 100644 docs/generated-html/structnv__std_1_1is__void-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__void.html create mode 100644 docs/generated-html/structnv__std_1_1is__void.png create mode 100644 docs/generated-html/structnv__std_1_1is__volatile-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__volatile.html create mode 100644 docs/generated-html/structnv__std_1_1is__volatile.png create mode 100644 docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.png create mode 100644 docs/generated-html/structnv__std_1_1less-members.html create mode 100644 docs/generated-html/structnv__std_1_1less.html create mode 100644 docs/generated-html/structnv__std_1_1nullptr__t.html create mode 100644 docs/generated-html/structnv__std_1_1plus-members.html create mode 100644 docs/generated-html/structnv__std_1_1plus.html create mode 100644 docs/generated-html/structnv__std_1_1remove__const-members.html create mode 100644 docs/generated-html/structnv__std_1_1remove__const.html create mode 100644 docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html create mode 100644 docs/generated-html/structnv__std_1_1remove__cv-members.html create mode 100644 docs/generated-html/structnv__std_1_1remove__cv.html create mode 100644 docs/generated-html/structnv__std_1_1remove__volatile-members.html create mode 100644 docs/generated-html/structnv__std_1_1remove__volatile.html create mode 100644 docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html create mode 100644 docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html create mode 100644 docs/generated-html/sync_off.png create mode 100644 docs/generated-html/sync_on.png create mode 100644 docs/generated-html/tab_a.png create mode 100644 docs/generated-html/tab_b.png create mode 100644 docs/generated-html/tab_h.png create mode 100644 docs/generated-html/tab_s.png create mode 100644 docs/generated-html/tabs.css create mode 100644 docs/generated-html/tensor__ref_8h.html create mode 100644 docs/generated-html/tensor__ref_8h_source.html create mode 100644 docs/generated-html/tensor__view_8h.html create mode 100644 docs/generated-html/tensor__view_8h_source.html create mode 100644 docs/generated-html/thread__multiply__add_8h.html create mode 100644 docs/generated-html/thread__multiply__add_8h_source.html create mode 100644 docs/generated-html/tile_8h.html create mode 100644 docs/generated-html/tile_8h_source.html create mode 100644 docs/generated-html/tile__iterator_8h.html create mode 100644 docs/generated-html/tile__iterator_8h_source.html create mode 100644 docs/generated-html/tile__traits__standard_8h.html create mode 100644 docs/generated-html/tile__traits__standard_8h_source.html create mode 100644 docs/generated-html/unioncutlass_1_1Vector-members.html create mode 100644 docs/generated-html/unioncutlass_1_1Vector.html create mode 100644 docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html create mode 100644 docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html create mode 100644 docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html create mode 100644 docs/generated-html/vector_8h.html create mode 100644 docs/generated-html/vector_8h_source.html create mode 100644 docs/generated-html/wmma__gemm__epilogue__traits_8h.html create mode 100644 docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html create mode 100644 docs/generated-html/wmma__gemm__global__tile_8h.html create mode 100644 docs/generated-html/wmma__gemm__global__tile_8h_source.html create mode 100644 docs/generated-html/wmma__gemm__multiply__add_8h.html create mode 100644 docs/generated-html/wmma__gemm__multiply__add_8h_source.html create mode 100644 docs/generated-html/wmma__gemm__shared__tile_8h.html create mode 100644 docs/generated-html/wmma__gemm__shared__tile_8h_source.html create mode 100644 docs/generated-html/wmma__gemm__traits_8h.html create mode 100644 docs/generated-html/wmma__gemm__traits_8h_source.html create mode 100644 docs/generated-html/wmma__matrix_8h.html create mode 100644 docs/generated-html/wmma__matrix_8h_source.html delete mode 100644 media/cutlass-performance-plot.png delete mode 100644 media/fig-09-complete-hierarchy.png create mode 100644 media/images/cutlass-performance-plot.png create mode 100644 media/images/gemm-hierarchy-with-epilogue-no-labels.png create mode 100644 media/images/gemm-hierarchy-with-epilogue.png create mode 100644 tools/CMakeLists.txt create mode 100644 tools/external/googletest/.gitignore create mode 100644 tools/external/googletest/.travis.yml create mode 100644 tools/external/googletest/BUILD.bazel create mode 100644 tools/external/googletest/CMakeLists.txt create mode 100644 tools/external/googletest/README.md create mode 100644 tools/external/googletest/WORKSPACE create mode 100644 tools/external/googletest/appveyor.yml create mode 100755 tools/external/googletest/ci/build-linux-bazel.sh create mode 100755 tools/external/googletest/ci/env-linux.sh create mode 100755 tools/external/googletest/ci/env-osx.sh create mode 100755 tools/external/googletest/ci/install-linux.sh create mode 100755 tools/external/googletest/ci/install-osx.sh create mode 100755 tools/external/googletest/ci/log-config.sh create mode 100755 tools/external/googletest/ci/travis.sh create mode 100644 tools/external/googletest/googlemock/CHANGES create mode 100644 tools/external/googletest/googlemock/CMakeLists.txt create mode 100644 tools/external/googletest/googlemock/CONTRIBUTORS create mode 100644 tools/external/googletest/googlemock/LICENSE create mode 100644 tools/external/googletest/googlemock/Makefile.am create mode 100644 tools/external/googletest/googlemock/README.md create mode 100644 tools/external/googletest/googlemock/build-aux/.keep create mode 100644 tools/external/googletest/googlemock/cmake/gmock.pc.in create mode 100644 tools/external/googletest/googlemock/cmake/gmock_main.pc.in create mode 100644 tools/external/googletest/googlemock/configure.ac create mode 100644 tools/external/googletest/googlemock/docs/CheatSheet.md create mode 100644 tools/external/googletest/googlemock/docs/CookBook.md create mode 100644 tools/external/googletest/googlemock/docs/DesignDoc.md create mode 100644 tools/external/googletest/googlemock/docs/DevGuide.md create mode 100644 tools/external/googletest/googlemock/docs/Documentation.md create mode 100644 tools/external/googletest/googlemock/docs/ForDummies.md create mode 100644 tools/external/googletest/googlemock/docs/FrequentlyAskedQuestions.md create mode 100644 tools/external/googletest/googlemock/docs/KnownIssues.md create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-actions.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-cardinalities.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-actions.h.pump create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-matchers.h.pump create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-generated-nice-strict.h.pump create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-matchers.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-more-actions.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-more-matchers.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock-spec-builders.h create mode 100644 tools/external/googletest/googlemock/include/gmock/gmock.h create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/custom/gmock-generated-actions.h create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/custom/gmock-generated-actions.h.pump create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/custom/gmock-matchers.h create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/custom/gmock-port.h create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/gmock-generated-internal-utils.h create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/gmock-generated-internal-utils.h.pump create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/gmock-internal-utils.h create mode 100644 tools/external/googletest/googlemock/include/gmock/internal/gmock-port.h create mode 100644 tools/external/googletest/googlemock/make/Makefile create mode 100644 tools/external/googletest/googlemock/msvc/2005/gmock.sln create mode 100644 tools/external/googletest/googlemock/msvc/2005/gmock.vcproj create mode 100644 tools/external/googletest/googlemock/msvc/2005/gmock_config.vsprops create mode 100644 tools/external/googletest/googlemock/msvc/2005/gmock_main.vcproj create mode 100644 tools/external/googletest/googlemock/msvc/2005/gmock_test.vcproj create mode 100644 tools/external/googletest/googlemock/msvc/2010/gmock.sln create mode 100644 tools/external/googletest/googlemock/msvc/2010/gmock.vcxproj create mode 100644 tools/external/googletest/googlemock/msvc/2010/gmock_config.props create mode 100644 tools/external/googletest/googlemock/msvc/2010/gmock_main.vcxproj create mode 100644 tools/external/googletest/googlemock/msvc/2010/gmock_test.vcxproj create mode 100644 tools/external/googletest/googlemock/msvc/2015/gmock.sln create mode 100644 tools/external/googletest/googlemock/msvc/2015/gmock.vcxproj create mode 100644 tools/external/googletest/googlemock/msvc/2015/gmock_config.props create mode 100644 tools/external/googletest/googlemock/msvc/2015/gmock_main.vcxproj create mode 100644 tools/external/googletest/googlemock/msvc/2015/gmock_test.vcxproj create mode 100755 tools/external/googletest/googlemock/scripts/fuse_gmock_files.py create mode 100644 tools/external/googletest/googlemock/scripts/generator/LICENSE create mode 100644 tools/external/googletest/googlemock/scripts/generator/README create mode 100644 tools/external/googletest/googlemock/scripts/generator/README.cppclean create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/__init__.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/ast.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/gmock_class.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/gmock_class_test.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/keywords.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/tokenize.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/cpp/utils.py create mode 100755 tools/external/googletest/googlemock/scripts/generator/gmock_gen.py create mode 100755 tools/external/googletest/googlemock/scripts/gmock-config.in create mode 100755 tools/external/googletest/googlemock/scripts/gmock_doctor.py create mode 100755 tools/external/googletest/googlemock/scripts/upload.py create mode 100755 tools/external/googletest/googlemock/scripts/upload_gmock.py create mode 100644 tools/external/googletest/googlemock/src/gmock-all.cc create mode 100644 tools/external/googletest/googlemock/src/gmock-cardinalities.cc create mode 100644 tools/external/googletest/googlemock/src/gmock-internal-utils.cc create mode 100644 tools/external/googletest/googlemock/src/gmock-matchers.cc create mode 100644 tools/external/googletest/googlemock/src/gmock-spec-builders.cc create mode 100644 tools/external/googletest/googlemock/src/gmock.cc create mode 100644 tools/external/googletest/googlemock/src/gmock_main.cc create mode 100644 tools/external/googletest/googlemock/test/BUILD.bazel create mode 100644 tools/external/googletest/googlemock/test/gmock-actions_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-cardinalities_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-generated-actions_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-generated-function-mockers_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-generated-internal-utils_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-generated-matchers_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-internal-utils_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-matchers_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-more-actions_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-nice-strict_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-port_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock-spec-builders_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_all_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_ex_test.cc create mode 100755 tools/external/googletest/googlemock/test/gmock_leak_test.py create mode 100644 tools/external/googletest/googlemock/test/gmock_leak_test_.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_link2_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_link_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_link_test.h create mode 100755 tools/external/googletest/googlemock/test/gmock_output_test.py create mode 100644 tools/external/googletest/googlemock/test/gmock_output_test_.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_output_test_golden.txt create mode 100644 tools/external/googletest/googlemock/test/gmock_stress_test.cc create mode 100644 tools/external/googletest/googlemock/test/gmock_test.cc create mode 100755 tools/external/googletest/googlemock/test/gmock_test_utils.py create mode 100644 tools/external/googletest/googletest/CHANGES create mode 100644 tools/external/googletest/googletest/CMakeLists.txt create mode 100644 tools/external/googletest/googletest/CONTRIBUTORS create mode 100644 tools/external/googletest/googletest/LICENSE create mode 100644 tools/external/googletest/googletest/Makefile.am create mode 100644 tools/external/googletest/googletest/README.md create mode 100644 tools/external/googletest/googletest/build-aux/.keep create mode 100644 tools/external/googletest/googletest/cmake/gtest.pc.in create mode 100644 tools/external/googletest/googletest/cmake/gtest_main.pc.in create mode 100644 tools/external/googletest/googletest/cmake/internal_utils.cmake create mode 100644 tools/external/googletest/googletest/codegear/gtest.cbproj create mode 100644 tools/external/googletest/googletest/codegear/gtest.groupproj create mode 100644 tools/external/googletest/googletest/codegear/gtest_all.cc create mode 100644 tools/external/googletest/googletest/codegear/gtest_link.cc create mode 100644 tools/external/googletest/googletest/codegear/gtest_main.cbproj create mode 100644 tools/external/googletest/googletest/codegear/gtest_unittest.cbproj create mode 100644 tools/external/googletest/googletest/configure.ac create mode 100644 tools/external/googletest/googletest/docs/AdvancedGuide.md create mode 100644 tools/external/googletest/googletest/docs/DevGuide.md create mode 100644 tools/external/googletest/googletest/docs/Documentation.md create mode 100644 tools/external/googletest/googletest/docs/FAQ.md create mode 100644 tools/external/googletest/googletest/docs/Pkgconfig.md create mode 100644 tools/external/googletest/googletest/docs/Primer.md create mode 100644 tools/external/googletest/googletest/docs/PumpManual.md create mode 100644 tools/external/googletest/googletest/docs/Samples.md create mode 100644 tools/external/googletest/googletest/docs/XcodeGuide.md create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-death-test.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-message.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-param-test.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-param-test.h.pump create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-printers.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-spi.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-test-part.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest-typed-test.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest_pred_impl.h create mode 100644 tools/external/googletest/googletest/include/gtest/gtest_prod.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/custom/gtest-port.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/custom/gtest-printers.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/custom/gtest.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-death-test-internal.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-filepath.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-internal.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-linked_ptr.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-param-util-generated.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-param-util-generated.h.pump create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-param-util.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-port-arch.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-port.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-string.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-tuple.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-tuple.h.pump create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-type-util.h create mode 100644 tools/external/googletest/googletest/include/gtest/internal/gtest-type-util.h.pump create mode 100644 tools/external/googletest/googletest/m4/acx_pthread.m4 create mode 100644 tools/external/googletest/googletest/m4/gtest.m4 create mode 100644 tools/external/googletest/googletest/make/Makefile create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest-md.sln create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest-md.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest-md.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest.sln create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_main-md.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_main-md.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_main.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_main.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_prod_test-md.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_prod_test-md.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_prod_test.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_prod_test.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_unittest-md.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_unittest-md.vcxproj.filters create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_unittest.vcxproj create mode 100644 tools/external/googletest/googletest/msvc/2010/gtest_unittest.vcxproj.filters create mode 100644 tools/external/googletest/googletest/samples/prime_tables.h create mode 100644 tools/external/googletest/googletest/samples/sample1.cc create mode 100644 tools/external/googletest/googletest/samples/sample1.h create mode 100644 tools/external/googletest/googletest/samples/sample10_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample1_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample2.cc create mode 100644 tools/external/googletest/googletest/samples/sample2.h create mode 100644 tools/external/googletest/googletest/samples/sample2_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample3-inl.h create mode 100644 tools/external/googletest/googletest/samples/sample3_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample4.cc create mode 100644 tools/external/googletest/googletest/samples/sample4.h create mode 100644 tools/external/googletest/googletest/samples/sample4_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample5_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample6_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample7_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample8_unittest.cc create mode 100644 tools/external/googletest/googletest/samples/sample9_unittest.cc create mode 100644 tools/external/googletest/googletest/scripts/common.py create mode 100755 tools/external/googletest/googletest/scripts/fuse_gtest_files.py create mode 100755 tools/external/googletest/googletest/scripts/gen_gtest_pred_impl.py create mode 100755 tools/external/googletest/googletest/scripts/gtest-config.in create mode 100755 tools/external/googletest/googletest/scripts/pump.py create mode 100755 tools/external/googletest/googletest/scripts/release_docs.py create mode 100644 tools/external/googletest/googletest/scripts/test/Makefile create mode 100755 tools/external/googletest/googletest/scripts/upload.py create mode 100755 tools/external/googletest/googletest/scripts/upload_gtest.py create mode 100644 tools/external/googletest/googletest/src/gtest-all.cc create mode 100644 tools/external/googletest/googletest/src/gtest-death-test.cc create mode 100644 tools/external/googletest/googletest/src/gtest-filepath.cc create mode 100644 tools/external/googletest/googletest/src/gtest-internal-inl.h create mode 100644 tools/external/googletest/googletest/src/gtest-port.cc create mode 100644 tools/external/googletest/googletest/src/gtest-printers.cc create mode 100644 tools/external/googletest/googletest/src/gtest-test-part.cc create mode 100644 tools/external/googletest/googletest/src/gtest-typed-test.cc create mode 100644 tools/external/googletest/googletest/src/gtest.cc create mode 100644 tools/external/googletest/googletest/src/gtest_main.cc create mode 100644 tools/external/googletest/googletest/test/BUILD.bazel create mode 100644 tools/external/googletest/googletest/test/gtest-death-test_ex_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-death-test_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-filepath_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-linked_ptr_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-listener_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-message_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-options_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-param-test2_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-param-test_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-param-test_test.h create mode 100644 tools/external/googletest/googletest/test/gtest-port_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-printers_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-test-part_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-tuple_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-typed-test2_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-typed-test_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest-typed-test_test.h create mode 100644 tools/external/googletest/googletest/test/gtest-unittest-api_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest_all_test.cc create mode 100755 tools/external/googletest/googletest/test/gtest_break_on_failure_unittest.py create mode 100644 tools/external/googletest/googletest/test/gtest_break_on_failure_unittest_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_catch_exceptions_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_catch_exceptions_test_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_color_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_color_test_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_env_var_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_env_var_test_.cc create mode 100644 tools/external/googletest/googletest/test/gtest_environment_test.cc create mode 100755 tools/external/googletest/googletest/test/gtest_filter_unittest.py create mode 100644 tools/external/googletest/googletest/test/gtest_filter_unittest_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_help_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_help_test_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_list_tests_unittest.py create mode 100644 tools/external/googletest/googletest/test/gtest_list_tests_unittest_.cc create mode 100644 tools/external/googletest/googletest/test/gtest_main_unittest.cc create mode 100644 tools/external/googletest/googletest/test/gtest_no_test_unittest.cc create mode 100755 tools/external/googletest/googletest/test/gtest_output_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_output_test_.cc create mode 100644 tools/external/googletest/googletest/test/gtest_output_test_golden_lin.txt create mode 100644 tools/external/googletest/googletest/test/gtest_pred_impl_unittest.cc create mode 100644 tools/external/googletest/googletest/test/gtest_premature_exit_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest_prod_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest_repeat_test.cc create mode 100755 tools/external/googletest/googletest/test/gtest_shuffle_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_shuffle_test_.cc create mode 100644 tools/external/googletest/googletest/test/gtest_sole_header_test.cc create mode 100644 tools/external/googletest/googletest/test/gtest_stress_test.cc create mode 100755 tools/external/googletest/googletest/test/gtest_test_utils.py create mode 100644 tools/external/googletest/googletest/test/gtest_throw_on_failure_ex_test.cc create mode 100755 tools/external/googletest/googletest/test/gtest_throw_on_failure_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_throw_on_failure_test_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_uninitialized_test.py create mode 100644 tools/external/googletest/googletest/test/gtest_uninitialized_test_.cc create mode 100644 tools/external/googletest/googletest/test/gtest_unittest.cc create mode 100644 tools/external/googletest/googletest/test/gtest_xml_outfile1_test_.cc create mode 100644 tools/external/googletest/googletest/test/gtest_xml_outfile2_test_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_xml_outfiles_test.py create mode 100755 tools/external/googletest/googletest/test/gtest_xml_output_unittest.py create mode 100644 tools/external/googletest/googletest/test/gtest_xml_output_unittest_.cc create mode 100755 tools/external/googletest/googletest/test/gtest_xml_test_utils.py create mode 100644 tools/external/googletest/googletest/test/production.cc create mode 100644 tools/external/googletest/googletest/test/production.h create mode 100644 tools/external/googletest/googletest/xcode/Config/DebugProject.xcconfig create mode 100644 tools/external/googletest/googletest/xcode/Config/FrameworkTarget.xcconfig create mode 100644 tools/external/googletest/googletest/xcode/Config/General.xcconfig create mode 100644 tools/external/googletest/googletest/xcode/Config/ReleaseProject.xcconfig create mode 100644 tools/external/googletest/googletest/xcode/Config/StaticLibraryTarget.xcconfig create mode 100644 tools/external/googletest/googletest/xcode/Config/TestTarget.xcconfig create mode 100644 tools/external/googletest/googletest/xcode/Resources/Info.plist create mode 100644 tools/external/googletest/googletest/xcode/Samples/FrameworkSample/Info.plist create mode 100644 tools/external/googletest/googletest/xcode/Samples/FrameworkSample/WidgetFramework.xcodeproj/project.pbxproj create mode 100644 tools/external/googletest/googletest/xcode/Samples/FrameworkSample/runtests.sh create mode 100644 tools/external/googletest/googletest/xcode/Samples/FrameworkSample/widget.cc create mode 100644 tools/external/googletest/googletest/xcode/Samples/FrameworkSample/widget.h create mode 100644 tools/external/googletest/googletest/xcode/Samples/FrameworkSample/widget_test.cc create mode 100644 tools/external/googletest/googletest/xcode/Scripts/runtests.sh create mode 100755 tools/external/googletest/googletest/xcode/Scripts/versiongenerate.py create mode 100644 tools/external/googletest/googletest/xcode/gtest.xcodeproj/project.pbxproj create mode 100644 tools/nvrtc/CMakeLists.txt create mode 100644 tools/nvrtc/cutlass/nvrtc/environment.h create mode 100644 tools/nvrtc/stdlib/assert.h create mode 100644 tools/nvrtc/stdlib/stdint.h create mode 100644 tools/test/CMakeLists.txt create mode 100644 tools/test/perf/CMakeLists.txt create mode 100644 tools/test/perf/cutlass_perf_test.cpp create mode 100644 tools/test/perf/gemm/cublas_dispatch.h create mode 100644 tools/test/perf/gemm/cutlass_dispatch.h create mode 100644 tools/test/perf/gemm/dgemm.cu create mode 100644 tools/test/perf/gemm/gemm_perf_testbed.h create mode 100644 tools/test/perf/gemm/gemm_profiler.h create mode 100644 tools/test/perf/gemm/hgemm.cu create mode 100644 tools/test/perf/gemm/igemm.cu create mode 100644 tools/test/perf/gemm/sgemm.cu create mode 100644 tools/test/perf/gemm/wmma_gemm.cu create mode 100644 tools/test/perf/performance_result.h create mode 100644 tools/test/perf/testbench_options.h create mode 100644 tools/test/perf/testbench_output.h create mode 100644 tools/test/unit/CMakeLists.txt create mode 100644 tools/test/unit/core/layout_verification.cu create mode 100644 tools/test/unit/core/layout_verification.h create mode 100644 tools/test/unit/core/predicate_vector.cu create mode 100644 tools/test/unit/core/tile_iterator.cu create mode 100644 tools/test/unit/cutlass_unit_test.cpp create mode 100644 tools/test/unit/cutlass_unit_test.h create mode 100644 tools/test/unit/gemm/dgemm.cu create mode 100644 tools/test/unit/gemm/gemm.h create mode 100644 tools/test/unit/gemm/gemm_nvrtc.cu create mode 100644 tools/test/unit/gemm/gemm_nvrtc.h create mode 100644 tools/test/unit/gemm/gemm_shared_mem_layouts.cu create mode 100644 tools/test/unit/gemm/gemm_testbed.h create mode 100644 tools/test/unit/gemm/hgemm_128x128x8.cu create mode 100644 tools/test/unit/gemm/hgemm_128x32x8.cu create mode 100644 tools/test/unit/gemm/hgemm_128x64x8.cu create mode 100644 tools/test/unit/gemm/igemm_128x128x32.cu create mode 100644 tools/test/unit/gemm/igemm_128x128x32_float.cu create mode 100644 tools/test/unit/gemm/igemm_128x128x32_int8.cu create mode 100644 tools/test/unit/gemm/igemm_128x32x32.cu create mode 100644 tools/test/unit/gemm/igemm_128x64x32.cu create mode 100644 tools/test/unit/gemm/sgemm_128x128x8.cu create mode 100644 tools/test/unit/gemm/sgemm_128x32x8.cu create mode 100644 tools/test/unit/gemm/sgemm_128x64x8.cu create mode 100644 tools/test/unit/gemm/sgemm_64x128x8.cu create mode 100644 tools/test/unit/gemm/sgemm_64x32x8.cu create mode 100644 tools/test/unit/gemm/sgemm_64x64x8.cu create mode 100644 tools/test/unit/gemm/wmma_gemm.cu create mode 100644 tools/test/unit/util/host_tensor.cu create mode 100644 tools/util/command_line.h create mode 100644 tools/util/device_memory.h create mode 100644 tools/util/exceptions.h create mode 100644 tools/util/half.h create mode 100644 tools/util/host_tensor.h create mode 100644 tools/util/host_tensor_view.h create mode 100644 tools/util/tensor_view_io.h create mode 100644 tools/util/type_traits.h diff --git a/CMake/bin2hex.cmake b/CMake/bin2hex.cmake new file mode 100644 index 0000000000..603c9a6cfe --- /dev/null +++ b/CMake/bin2hex.cmake @@ -0,0 +1,26 @@ +# A small utility function which generates a C-header from an input file +function(FILE_TO_C_STRING FILENAME VARIABLE_NAME OUTPUT_STRING ZERO_TERMINATED) + FILE(READ "${FILENAME}" HEX_INPUT HEX) + if (${ZERO_TERMINATED}) + string(APPEND HEX_INPUT "00") + endif() + + string(REGEX REPLACE "(....)" "\\1\n" HEX_OUTPUT ${HEX_INPUT}) + string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," HEX_OUTPUT ${HEX_OUTPUT}) + + set(HEX_OUTPUT "static char const ${VARIABLE_NAME}[] = {\n ${HEX_OUTPUT}\n};\n") + + set(${OUTPUT_STRING} "${HEX_OUTPUT}" PARENT_SCOPE) +endfunction() + +message("Create header file for ${FILE_IN}") +message("Create header file for ${FILE_OUT}") +file_to_c_string(${FILE_IN} ${VARIABLE_NAME} OUTPUT_STRING ZERO_TERMINATED) + +set(RESULT "#pragma once\n") +string(APPEND RESULT "namespace cutlass {\n") +string(APPEND RESULT "namespace nvrtc {\n") +string(APPEND RESULT "${OUTPUT_STRING}") +string(APPEND RESULT "} // namespace nvrtc\n") +string(APPEND RESULT "} // namespace cutlass\n") +file(WRITE "${FILE_OUT}" "${RESULT}") diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..5a53fae555 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,182 @@ +# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted +# provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, this list of +# conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, this list of +# conditions and the following disclaimer in the documentation and/or other materials +# provided with the distribution. +# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used +# to endorse or promote products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cmake_minimum_required(VERSION 3.3.0) + +set(CUTLASS_LANGUAGES CXX) + +# CMake 3.9.0 has native support for CUDA without the need of the CUDA package. Use it! +if(WIN32 AND NOT ${CMAKE_VERSION} VERSION_LESS "3.9.0") + list(APPEND CUTLASS_LANGUAGES CUDA) + set(CUTLASS_NATIVE_CUDA TRUE) + + macro(cutlass_add_executable) + add_executable(${ARGN}) + endmacro() +else() + # FindCUDA fails to detect VS 2017 due to a changed directory format of the toolkits. + # For this configuration we need CMake >= 3.9.0 to use the native CUDA support. + if (WIN32 AND MSVC_VERSION GREATER 1800) + message(FATAL_ERROR "Please upgrade CMake to version >= 3.9.0 to support Visual Studio 2017 or higher") + endif() + + # Fall back to the FindCUDA version to create an executable with CUDA files + macro(cutlass_add_executable) + cuda_add_executable(${ARGN}) + endmacro() +endif() + +project(CUTLASS ${CUTLASS_LANGUAGES}) + +# check if the configuration is supported +if( NOT CMAKE_SIZEOF_VOID_P EQUAL 8 ) + message(FATAL_ERROR "CUTLASS requires a 64-bit compiler!") +endif() + +find_package(CUDA) +find_package(Doxygen QUIET) + +# By default we want to build in Release mode to ensure that we're getting best performance +if (NOT (CMAKE_BUILD_TYPE OR CONFIGURATION_TYPES)) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose build level" FORCE) + # We do support Debug or Release builds + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") +endif() + +if(WIN32) + # On Windows we link against the shared (DLL) runtime. Change gtest settings to match this. + set(gtest_force_shared_crt ON CACHE BOOL "Use shared (DLL) run-time lib even when Google Test is built as static lib" FORCE) +endif() + +if (WIN32) + # Enable more warnings and treat as errors + string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX") + + # Disable excess x86 floating point precision that can lead to results being labeled incorrectly + string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict") + + # Verbose option + if (${CUTLASS_NVCC_VERBOSE}) + string(APPEND NVCC_FLAGS " -v") + endif() +endif(WIN32) + +# Configure CUDA options +set(CUTLASS_NVCC_ARCHS "50;60;61;70" CACHE STRING "The SM architectures to build code for.") +set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.") + +foreach(ARCH ${CUTLASS_NVCC_ARCHS}) + string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}") +endforeach() + + +if (CUTLASS_NVCC_KEEP) + string(APPEND NVCC_FLAGS " -keep") +endif() + +if (WIN32 AND CUTLASS_NATIVE_CUDA) + string(APPEND NVCC_FLAGS_RELEASE " -lineinfo") +else() + string(APPEND NVCC_FLAGS " -lineinfo") +endif() + +if (UNIX) + string(APPEND NVCC_FLAGS " -Xcompiler -Wconversion") +endif() + +string(APPEND NVCC_FLAGS_DEBUG " -g") +string(APPEND NVCC_FLAGS_RELEASE " -O3") + +# define NDEBUG for release mode to disable assertions +string(APPEND NVCC_FLAGS_RELEASE " -DNDEBUG") + +if (CUTLASS_NATIVE_CUDA) + set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS}") + set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}") + set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_RELEASE}") +else() + set(CUDA_NVCC_FLAGS ${NVCC_FLAGS}) + set(CUDA_NVCC_FLAGS_DEBUG ${NVCC_FLAGS_DEBUG}) + set(CUDA_NVCC_FLAGS_RELEASE ${NVCC_FLAGS_RELEASE}) +endif() + +# +# The following items should eventually be pushed into cutlass/CMakeLists.txt +# + +# GLOB for CUTLASS header files. Should we use a static list instead? +file(GLOB CUTLASS_GEMM RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/gemm/*.h) +file(GLOB CUTLASS_UTIL RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/util/*.h) +file(GLOB CUTLASS_DEVICE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/device/*.h) +file(GLOB CUTLASS_CORE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/*.h) + +source_group("cutlass\\gemm" FILES ${CUTLASS_GEMM}) +source_group("cutlass\\util" FILES ${CUTLASS_UTIL}) +source_group("cutlass\\device" FILES ${CUTLASS_DEVICE}) +source_group("cutlass" FILES ${CUTLASS_CORE}) + +add_library(CUTLASS INTERFACE) +include_directories("${CMAKE_CURRENT_SOURCE_DIR}") +target_sources(CUTLASS INTERFACE + ${CUTLASS_GEMM} + ${CUTLASS_UTIL} + ${CUTLASS_DEVICE} + ${CUTLASS_CORE} +) + +target_include_directories(CUTLASS INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) + +# Create a custom target to ensure that the CUTLASS sources are visible in an IDE +add_custom_target(cutlass_ide SOURCES + ${CUTLASS_GEMM} + ${CUTLASS_UTIL} + ${CUTLASS_DEVICE} + ${CUTLASS_CORE} +) +# Doxygen is available. Generate documentation +if (DOXYGEN_FOUND) + # DOT is available. Enable graph generation in the documentation + if (DOXYGEN_DOT_EXECUTABLE) + set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.") + else() + set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE) + endif() + + if (CUTLASS_ENABLE_DOXYGEN_DOT) + set(HAVE_DOT "YES") + else() + set(HAVE_DOT "NO") + endif() + + # Add custom target for Doxygen. + add_custom_target(cutlass_docs ${CMAKE_COMMAND} -E env + "DOT_PATH=${DOXYGEN_DOT_EXECUTABLE}" + "HAVE_DOT=${HAVE_DOT}" + ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + VERBATIM + ) +endif() + + +#add_subdirectory(examples/gemm) +add_subdirectory(tools) diff --git a/Doxyfile b/Doxyfile index 15650b2076..51cec529b3 100644 --- a/Doxyfile +++ b/Doxyfile @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = doxygen +OUTPUT_DIRECTORY = docs # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and @@ -218,7 +218,8 @@ TAB_SIZE = 4 # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines. -ALIASES = +#ALIASES += "concept{1}=@ingroup \1\n@par Implemented concepts:\n@ref \1" +ALIASES += "concept{1}=@ingroup \1" # This tag can be used to specify a number of word-keyword mappings (TCL only). # A mapping has the form "name=value". For example adding "class=itcl::class" @@ -396,7 +397,7 @@ LOOKUP_CACHE_SIZE = 0 # normally produced when WARNINGS is set to YES. # The default value is: NO. -EXTRACT_ALL = NO +EXTRACT_ALL = YES # If the EXTRACT_PRIVATE tag is set to YES all private members of a class will # be included in the documentation. @@ -733,7 +734,7 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = cutlass cutlass/gemm cutlass/util +INPUT = cutlass # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -759,7 +760,7 @@ FILE_PATTERNS = # be searched for input files as well. # The default value is: NO. -RECURSIVE = NO +RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a @@ -2032,7 +2033,7 @@ HIDE_UNDOC_RELATIONS = YES # set to NO # The default value is: NO. -HAVE_DOT = NO +HAVE_DOT = $(HAVE_DOT) # The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed # to run in parallel. When set to 0 doxygen will base this on the number of @@ -2204,7 +2205,7 @@ INTERACTIVE_SVG = NO # found. If left blank, it is assumed the dot tool can be found in the path. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_PATH = +DOT_PATH = $(DOT_PATH) # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the \dotfile diff --git a/README.md b/README.md index a13c9a4c8c..05a0d3a33a 100644 --- a/README.md +++ b/README.md @@ -1,106 +1,213 @@ -![ALT](/media/fig-09-complete-hierarchy.png "Complete CUDA GEMM decomposition") +![ALT](/media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") -# Introduction +# CUTLASS 1.0 -CUTLASS is a collection of CUDA C++ template abstractions for implementing -high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA. -It incorporates strategies for hierarchical decomposition and data movement similar -to those used to implement cuBLAS. CUTLASS decomposes these "moving parts" into +CUTLASS 1.0 is a collection of CUDA C++ template abstractions for implementing +high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA. +It incorporates strategies for hierarchical decomposition and data movement similar +to those used to implement cuBLAS. CUTLASS decomposes these "moving parts" into reusable, modular software components abstracted by C++ template classes. These -thread-wide, warp-wide, block-wide, and device-wide primitives can be specialized -and tuned via custom tiling sizes, data types, and other algorithmic policy. The -resulting flexibility simplifies their use as building blocks within custom kernels +thread-wide, warp-wide, block-wide, and device-wide primitives can be specialized +and tuned via custom tiling sizes, data types, and other algorithmic policy. The +resulting flexibility simplifies their use as building blocks within custom kernels and applications. To support a wide variety of applications, CUTLASS provides extensive support for -mixed-precision computations, providing specialized data-movement and -multiply-accumulate abstractions for 8-bit integer, half-precision floating -point (FP16), single-precision floating point (FP32), and double-precision floating +mixed-precision computations, providing specialized data-movement and +multiply-accumulate abstractions for 8-bit integer, half-precision floating +point (FP16), single-precision floating point (FP32), and double-precision floating point (FP64) types. Furthermore, CUTLASS demonstrates CUDA's WMMA API for targeting -the programmable, high-throughput _Tensor Cores_ provided by NVIDIA's Volta architecture +the programmable, high-throughput _Tensor Cores_ provided by NVIDIA's Volta architecture and beyond. -For more exposition, see our Parallel Forall blog post [CUTLASS: Fast Linear Algebra -in CUDA C++](https://devblogs.nvidia.com/parallelforall/cutlass-linear-algebra-cuda). +CUTLASS 1.0 has changed substantially from our preview release described in +the [CUTLASS Parallel For All](https://devblogs.nvidia.com/parallelforall/cutlass-linear-algebra-cuda) +post. We have decomposed the structure of the GEMM computation into deeper, structured +primitives for loading data, computing predicate masks, streaming data at each level of +the GEMM hierarchy, and updating the output matrix. + +CUTLASS 1.0 is described in the [Doxygen documentation](https://github.com/NVIDIA/cutlass/docs) +and our talk at the GPU Technology Conference 2018 (login required). # Performance -

+

-CUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels, -they exhibit performance comparable to cuBLAS for scalar GEMM -computations. The above figure shows CUTLASS performance relative to cuBLAS -for large matrix dimensions (M=10240, N=K=4096) running on an NVIDIA Tesla V100 GPU -when compiled with CUDA 9.0. +CUTLASS primitives are very efficient. When used to construct device-wide GEMM kernels, +they exhibit performance comparable to cuBLAS for scalar GEMM +computations. The above figure shows CUTLASS performance relative to cuBLAS +for large matrix dimensions (M=10240, N=K=4096) running on an NVIDIA Titan V GPU +when compiled with CUDA 9.2. -# Project Structure +# Compatibility -CUTLASS is arranged as a header-only library with several example test programs -that demonstrate instantiating a GEMM task within a CUDA kernel. Comments inline -with the source explain the individual components. +CUTLASS requires CUDA 9 and performs best with [CUDA 9.2 Toolkit](ttps://developer.nvidia.com/cuda-toolkit) or later. -The repository is organized in the following arrangement. +|**Operating System** | **Compiler** | +|-----------------|----------| +| Windows 10 | Microsoft Visual Studio 2015| +| | Microsoft Visual Studio 2017| +| Ubuntu 14.04 | GCC 4.8.2 | +| Ubuntu 16.04 | GCC 5.4.0 | - cutlass/ Root of header-only source library for matrix multiply - gemm/ Implementation of GEMM __device__ code and supporting components - util/ Utility components for CUDA device-side CUDA development -A test program is provided to illustrate the use of CUTLASS. This is implemented -in the following directory. +CUTLASS runs successfully on the following NVIDIA GPUs, and it is expected to be efficient on +any Maxwell-, Pascal-, or Volta-architecture NVIDIA GPU. - cutlass_test Root of test programs depicting CUTLASS kernels - util/ Utilities - gemm.cu Simple example calling CUTLASS and CUBLAS GEMM kernels - Makefile Build script for test programs +|**GPU**| +|---| +|NVIDIA GeForce 1080| +|NVIDIA TitanXP| +|NVIDIA Tesla P100| +|NVIDIA Tesla V100| +|NVIDIA TitanV| -# Makefile usage +# Building CUTLASS -There are different sample targets for different GEMM data types and -transposititions. Be sure to specify your target architecture. +CUTLASS is a header-only template library and does not need to be built to be used by other +projects. However, we distribute extensive unit tests and utility programs to demonstrate +CUTLASS. These instructions are for building those test programs. - make sm=<60|61|70> \ - [transpose=] [verbose=<0|1>] [keep=<0|1>] +CUTLASS's unit tests depend on Google Test which exists as a git submodule. You can fetch +submodules as follows. + +``` +$ git submodule update --init --recursive +``` +CUTLASS can be build with CMake starting version 3.10. By default CUTLASS will build kernels +for CUDA architecture versions 5.0, 6.0, 6.1 and 7.0. To reduce compile time you can specify +the architectures to build CUTLASS for by changing the CMake configuration setting +`CUTLASS_NVCC_ARCHS`. -# Program usage +Create a build directory within the CUTLASS project, then run CMake once. - Program usage: +``` +$ mkdir build && cd build +$ cmake .. +``` - gemm_ - [--help] - [--schmoo=<#schmoo-samples> || --m= --n= --k=] - [--i=] - [--device=] - [--alpha= --beta=] +Compile the CUTLASS project by running Make. Include the -j argument to compile sources in +parallel and speed up the build process. +``` +$ make -j12 +... +$ +``` -# Open Source License +Verify CUTLASS has been built correctly by running the unit tests from the build/ directory. + +``` +$ ./tools/test/unit/cutlass_unit_test +... +... +... +[----------] Global test environment tear-down +[==========] 481 tests from 24 test cases ran. (5954 ms total) +[ PASSED ] 481 tests. +``` + +All tests should pass, though the exact number of tests may vary over time. + + +# Project Structure + +CUTLASS is arranged as a header-only library with several example test programs +that demonstrate instantiating a GEMM task within a CUDA kernel. The Doxygen documentation +provides a complete list of files, classes, and template concepts defined in the CUTLASS +project. A brief summary is described below. -CUTLASS is released by NVIDIA Corporation under the "New BSD" open-source license: +The CUTLASS library is defined in the cutlass/ directory and consists of CUDA C++ template +classes and other definitions for implementing efficient GPU GEMM kernels. A set of core +classes and templates define basic primitives that are then applied to compute GEMM via +templates in the cutlass/gemm directory. ``` -Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the NVIDIA CORPORATION nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +cutlass/ + gemm/ + util/ + ``` + +Several tools and test programs are also distributed with the CUTLASS library. They are +contained in the following directories. + +``` +tools/ + test/ + unit/ + core/ + gemm/ + perf/ + util/ + +``` + +The `test/unit/` directory consist of unit tests implemented with Google Test that demonstrate +basic usage of Core API components and complete tests of the CUTLASS GEMM computations. + +# Performance Profiling + +The `test/perf/` directory contains a command-line utility for launching each of the GEMM kernels. +Its usage is shown below. + +Program usage: + +``` + cutlass_perf_test [options] + + --help + --append= If true, appends output to existing CSV file. If false, overwrites. + --alpha= Value for alpha to be used in GEMM experiments + --beta= Value for beta to be used in GEMM experiments + --output= Writes summary of profiling to specified .csv file + --iterations= maximum number of iterations to execute when profiling + --m=[:max height[:step]] Height of GEMM problem (number of rows of C). May specify a range with optional step size. + --n=[:max width[:step]] Width of GEMM problem (number of columns of C). May specify a range with optional step size. + --k=[:max depth[:step]] Size of inner dimension of A and B. May specify a range with optional step size. + --kernels=<{s|d|h|i|wmma}gemm_{nn,nt,tn,tt}> Select GEMM datatype and layout to use for tests + --peak= If true, only reports peak performance per kernel after profiling specified problem space. + --seed= Random seed used by the random number generator in initializing input matrices. + --tags= Inserts leading columns in output table and uniform values for each column. Useful for generating pivot tables. + + + Example usage: + + # Runs one problem size for all kernels + $ ./tools/test/perf/cutlass_perf_test --m=10240 --n=1024 --k=1024 + + # Varies GEMM K dimension for SGEMM and IGEMM with column-major multiplicands + $ ./tools/test/perf/cutlass_perf_test --m=10240 --n=4096 --k=1024:8192:128 --kernels=sgemm_nn,igemm_nn +``` + +# About + +CUTLASS is released by NVIDIA Corporation as Open Source software under the +3-clause "New" BSD license. + + +# Copyright + +Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of + conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + to endorse or promote products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/clang-format.sh b/clang-format.sh new file mode 100755 index 0000000000..b2570d9147 --- /dev/null +++ b/clang-format.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e + +function formatFiles { + for f in `find "$1" -type f -name "*.$2"` ; do + COMMAND="clang-format -i $f" + echo $COMMAND + $COMMAND + done +} + +formatFiles "cutlass" "h" +formatFiles "tools/test" "h" +formatFiles "tools/test" "cpp" +formatFiles "tools/util" "h" + diff --git a/common.mk b/common.mk deleted file mode 100644 index 672ea5b244..0000000000 --- a/common.mk +++ /dev/null @@ -1,181 +0,0 @@ -#/****************************************************************************** -# * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. -# * -# * Redistribution and use in source and binary forms, with or without -# * modification, are permitted provided that the following conditions are met: -# * * Redistributions of source code must retain the above copyright -# * notice, this list of conditions and the following disclaimer. -# * * Redistributions in binary form must reproduce the above copyright -# * notice, this list of conditions and the following disclaimer in the -# * documentation and/or other materials provided with the distribution. -# * * Neither the name of the NVIDIA CORPORATION nor the -# * names of its contributors may be used to endorse or promote products -# * derived from this software without specific prior written permission. -# * -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# * -#******************************************************************************/ - - -#------------------------------------------------------------------------------- -# Commandline Options -#------------------------------------------------------------------------------- - -# sm= Compute-capability to compile for, e.g., "sm=200,300,350" (SM2.0 by default). - -COMMA := , -ifdef sm - SM_ARCH := $(subst $(COMMA),-,$(sm)) -else - $(error Please specify SM architecture makefile argument: "sm=XX") -endif - -ifeq (70, $(findstring 70, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_70,code=\"sm_70,compute_70\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_70 -endif -ifeq (62, $(findstring 62, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_62,code=\"sm_62,compute_62\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_62 -endif -ifeq (61, $(findstring 61, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_61,code=\"sm_61,compute_61\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_61 -endif -ifeq (60, $(findstring 60, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_60,code=\"sm_60,compute_60\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_60 -endif -ifeq (52, $(findstring 52, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_52,code=\"sm_52,compute_52\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_52 -endif -ifeq (37, $(findstring 37, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_37,code=\"sm_37,compute_37\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_37 -endif -ifeq (35, $(findstring 35, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_35,code=\"sm_35,compute_35\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_35 -endif -ifeq (30, $(findstring 30, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_30,code=\"sm_30,compute_30\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_30 -endif -ifeq (21, $(findstring 21, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_20,code=\"sm_21,compute_20\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_21 -endif -ifeq (20, $(findstring 20, $(SM_ARCH))) - SM_TARGETS += -gencode=arch=compute_20,code=\"sm_20,compute_20\" - CLANG_SM_TARGETS += --cuda-gpu-arch=sm_20 -endif - - -# [verbose=<0|1>] Verbose toolchain output from nvcc option -ifeq ($(verbose), 1) - NVCCFLAGS += -v - CLANG_CFLAGS += -v -endif - - -# [keep=<0|1>] Keep intermediate compilation artifacts option -ifeq ($(keep), 1) - NVCCFLAGS += -keep - CLANG_CFLAGS += --save-temps -endif - - -# [debug=<0|1>] Generate debug mode code -ifeq ($(debug), 1) - NVCCFLAGS += -G - CLANG_CFLAGS += --cuda-noopt-device-debug -endif - - -#------------------------------------------------------------------------------- -# Compiler and compilation platform -#------------------------------------------------------------------------------- - -BASE_DIR := $(dir $(lastword $(MAKEFILE_LIST))) - -NVCC := "$(shell which nvcc)" -ifdef nvccver - NVCC_VERSION := $(nvccver) -else - NVCC_VERSION := $(strip $(shell nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//')) -endif - -# Detect OS -OSUPPER := $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) - -# Default flags: verbose kernel properties (regs, smem, cmem, etc.); runtimes for compilation phases -NVCCFLAGS += -O3 -Xptxas -v -CLANG_CFLAGS += -O3 -Xcuda-ptxas -v -ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) - # For MSVC - - # Enable more warnings and treat as errors - NVCCFLAGS += -Xcompiler /W3 -Xcompiler /WX - - # Disable excess x86 floating point precision that can lead to results being labeled incorrectly - NVCCFLAGS += -Xcompiler /fp:strict - - # Compiler - CC := cl - - # Multithreaded runtime - NVCCFLAGS += -Xcompiler /MT - - CUDART_CYG := "$(shell dirname $(NVCC))/../lib/x64/cudart.lib" - CUDART := "$(shell cygpath -w $(CUDART_CYG))" - -else - # For g++ - - # Disable excess x86 floating point precision that can lead to results being labeled incorrectly - #NVCCFLAGS += -Xcompiler -ffloat-store - - # Compiler - CC := g++ - - CUDART := "$(shell dirname $(NVCC))/../lib64/libcudart_static.a" - -endif - -# compiler=clang Enables compilation with clang. - -ifeq ($(compiler), clang) - # NVCC_VERSION is used as the proxy for the CUDA version. - BIN_SUFFIX := sm$(SM_ARCH)_clang_cuda_$(NVCC_VERSION) - # Clangs needs few extra flags to point it to CUDA SDK - # and link the binaries with CUDA runtime. - CUDA_BASE=$(realpath $(join $(dir $(shell which nvcc)), ..)) - CLANG_CFLAGS += --cuda-path=$(CUDA_BASE) - LIBINC += -L$(CUDA_BASE)/lib64 -Wl,-rpath=$(CUDA_BASE)/lib64 - LIBS += -lcudart - - # Replace NVCC and its options with clang++. - NVCC = clang++ - NVCCFLAGS = $(CLANG_CFLAGS) - SM_TARGETS = $(CLANG_SM_TARGETS) -else - # Suffix to append to each binary - BIN_SUFFIX := sm$(SM_ARCH)_nvcc_$(NVCC_VERSION) -endif - - -#------------------------------------------------------------------------------- -# Function for computing dependency Lists -#------------------------------------------------------------------------------- - -rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) diff --git a/cutlass/convert.h b/cutlass/convert.h new file mode 100644 index 0000000000..933d68a82a --- /dev/null +++ b/cutlass/convert.h @@ -0,0 +1,102 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! + \file + \brief Defines conversion operations among Fragments of different base type. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Convert {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Convert, Fragment > { + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + /// Ctor. + CUTLASS_DEVICE Convert() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + transform(src, 0, dst); + } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) { + for (int i = 0; i < kScalars_; ++i) { + dst[i] = static_cast(src[i + offset]); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Copy { + /// The input fragment. + typedef Fragment_ InputFragment; + /// The output fragment. + typedef Fragment_ OutputFragment; + + /// Ctor. + CUTLASS_DEVICE Copy() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(Fragment_ const& src, Fragment_& dst) { transform(src, 0, dst); } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(InputFragment_ const& src, int offset, Fragment_& dst) { + if (sizeof(typename Fragment_::Element) == 8) { + uint64_t const* src_ptr = reinterpret_cast(&src[offset]); + uint64_t* dst_ptr = reinterpret_cast(&dst[0]); + for (int i = 0; i < sizeof(Fragment_) / 8; ++i) { + dst_ptr[i] = src_ptr[i]; + } + } else { + uint32_t const* src_ptr = reinterpret_cast(&src[offset]); + uint32_t* dst_ptr = reinterpret_cast(&dst[0]); + for (int i = 0; i < sizeof(Fragment_) / 4; ++i) { + dst_ptr[i] = src_ptr[i]; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/coord.h b/cutlass/coord.h new file mode 100644 index 0000000000..431c9bf1a0 --- /dev/null +++ b/cutlass/coord.h @@ -0,0 +1,287 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief A Coord is a coordinate of arbitrary rank into a tensor or matrix +*/ + +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Describes identity elements +struct Identity { + /// Enumeration describing identity elements. Value assignments are significant. + /// Feel free to add or multiply by these, respectively. + enum Kind { Additive = 0, Multiplicative = 1 }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Statically-sized array specifying Coords within a tensor +template +struct Coord { + // + // Type and constant definitions + // + + static int const N = N_; + + // + // Data members + // + + /// Indices + int idx[N]; + + // + // Methods + // + + /// Default ctor initializes uniformly + CUTLASS_HOST_DEVICE + Coord(int value = 0) { + for (int i = 0; i < N; ++i) { + idx[i] = value; + } + } + + /// Constructs from an array of integers + CUTLASS_HOST_DEVICE + Coord(int _idx[]) { + for (int i = 0; i < N; ++i) { + idx[i] = _idx[i]; + } + } + + /// Element-wise addition + CUTLASS_HOST_DEVICE + Coord operator+(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] + b.idx[i]; + } + return c; + } + + /// Element-wise subtraction + CUTLASS_HOST_DEVICE + Coord operator-(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] - b.idx[i]; + } + return c; + } + + /// Element-wise multiplication + CUTLASS_HOST_DEVICE + Coord operator*(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] * b.idx[i]; + } + return c; + } + + /// Element-wise division + CUTLASS_HOST_DEVICE + Coord operator/(Coord const& b) const { + Coord c; + for (int i = 0; i < N; ++i) { + c.idx[i] = idx[i] / b.idx[i]; + } + return c; + } + + /// In-place addition + CUTLASS_HOST_DEVICE + Coord& operator+=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] += b.idx[i]; + } + return *this; + } + + /// In-place subtraction + CUTLASS_HOST_DEVICE + Coord& operator-=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] -= b.idx[i]; + } + return *this; + } + + /// In-place multiplication + CUTLASS_HOST_DEVICE + Coord& operator*=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] *= b.idx[i]; + } + return *this; + } + + /// In-place division + CUTLASS_HOST_DEVICE + Coord& operator/=(Coord const& b) { + for (int i = 0; i < N; ++i) { + idx[i] /= b.idx[i]; + } + return *this; + } + + /// Member access operator + CUTLASS_HOST_DEVICE int& operator[](int dim) { return idx[dim]; } + + /// Member access operator + CUTLASS_HOST_DEVICE int const& operator[](int dim) const { return idx[dim]; } + + /// Computes the dot product of two Coord instances + template + CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const { + for (int i = 0; i < N; ++i) { + sum += idx[i] * b.idx[i]; + } + return sum; + } + + /// Computes the dot product of two Coord instances + template + CUTLASS_HOST_DEVICE T dot(Coord const& b) const { + T sum = T(0); + for (int i = 0; i < N; ++i) { + sum += idx[i] * b.idx[i]; + } + return sum; + } + + /// Gets the index of a given Coord element + template + CUTLASS_HOST_DEVICE int& at() { + return idx[Dim]; + } + + /// Access via index; may limit unrolling potential + CUTLASS_HOST_DEVICE + int& at(int dim) { return idx[dim]; } + + /// Gets the index of a given Coord element + template + CUTLASS_HOST_DEVICE int const& at() const { + return idx[Dim]; + } + + /// Access via index; may limit unrolling potential + CUTLASS_HOST_DEVICE + int const& at(int dim) const { return idx[dim]; } + + /// Determines if two Coord<> objects are equal + CUTLASS_HOST_DEVICE + bool operator==(Coord const& b) const { + bool equal = true; + for (int i = 0; equal && i < N; ++i) { + equal = (idx[i] == b.idx[i]); + } + return equal; + } + + /// Not equal + CUTLASS_HOST_DEVICE + bool operator!=(Coord const& b) const { return !(*this == b); } + + /// Clamps a coordinate to a range specified by maximum and minimum values + CUTLASS_HOST_DEVICE + Coord& clamp(Coord const& max, Coord const& min = Coord()) { + for (int i = 0; i < N; ++i) { + idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]); + } + return *this; + } + + /// Returns the product of all elements + CUTLASS_HOST_DEVICE + int count() const { + int product = idx[0]; + for (int i = 1; i < N; ++i) { + product *= idx[i]; + } + return product; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to make a 2-element coordinate +CUTLASS_HOST_DEVICE +Coord<1> make_Coord(int _0) { + int values[1] = {_0}; + return Coord<1>(values); +} + +/// Helper to make a 2-element coordinate +CUTLASS_HOST_DEVICE +Coord<2> make_Coord(int _0, int _1) { + int values[2] = {_0, _1}; + return Coord<2>(values); +} + +/// Helper to make a 3-element coordinate +CUTLASS_HOST_DEVICE +Coord<3> make_Coord(int _0, int _1, int _2) { + int values[3] = {_0, _1, _2}; + return Coord<3>(values); +} + +/// Helper to make a 4-element coordinate +CUTLASS_HOST_DEVICE +Coord<4> make_Coord(int _0, int _1, int _2, int _3) { + int values[4] = {_0, _1, _2, _3}; + return Coord<4>(values); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Getter +CUTLASS_HOST_DEVICE +Coord<2> get_Coord_hw(Coord<3> const& coord) { return make_Coord(coord[1], coord[2]); } + +/// Getter +CUTLASS_HOST_DEVICE +Coord<2> get_Coord_hw(Coord<4> const& coord) { return make_Coord(coord[1], coord[2]); } + +/// Getter +CUTLASS_HOST_DEVICE +Coord<3> get_Coord_hwc(Coord<4> const& coord) { return make_Coord(coord[1], coord[2], coord[3]); } + +/// Getter +CUTLASS_HOST_DEVICE +Coord<3> get_Coord_dhw(Coord<4> const& coord) { return make_Coord(coord[0], coord[1], coord[2]); } + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/core_io.h b/cutlass/core_io.h new file mode 100644 index 0000000000..cceea4c06d --- /dev/null +++ b/cutlass/core_io.h @@ -0,0 +1,44 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +#pragma once + +/*! \file + \brief Helpers for printing cutlass/core objects +*/ + +#pragma once + +#include +#include + +#include + +template +std::ostream& operator<<(std::ostream& out, cutlass::Coord const& coord) { + for (int i = 0; i < Rank; ++i) { + out << (i ? ", " : "") << coord.idx[i]; + } + return out; +} diff --git a/cutlass/cutlass.h b/cutlass/cutlass.h new file mode 100644 index 0000000000..1e428b166b --- /dev/null +++ b/cutlass/cutlass.h @@ -0,0 +1,73 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +/*! \file + \brief Basic include for CUTLASS macros +*/ + +#pragma once + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#define CUTLASS_MAJOR 1 +#define CUTLASS_MINOR 0 +#define CUTLASS_PATCH 0 +#define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH) + +#ifdef __NVCC__ +#define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__ +#define CUTLASS_DEVICE __forceinline__ __device__ +#elif defined(__CUDACC_RTC__) +#define CUTLASS_HOST_DEVICE __forceinline__ __device__ +#define CUTLASS_DEVICE __forceinline__ __device__ +#else +#define CUTLASS_HOST_DEVICE +// CUTLASS_DEVICE is an error if not compiling device code +#endif + +// CUTLASS_PRAGMA_UNROLL inserts a CUTLASS_PRAGMA_UNROLL if supported by the compiler +#if defined(__CUDA_ARCH__) +#if defined(_MSC_VER) +#define CUTLASS_PRAGMA_UNROLL __pragma("unroll") +#define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1") +#else +#define CUTLASS_PRAGMA_UNROLL _Pragma("unroll") +#define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1") +#endif +#else +#define CUTLASS_PRAGMA_UNROLL +#define CUTLASS_PRAGMA_NO_UNROLL +#endif + +#define CUTLASS_ASSERT(x) assert(x) + +namespace cutlass { + +/// NVIDIA GPU Warp size +static const int kWarpSize = 32; + +} // namespace cutlass + +//////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/cutlass/fragment.h b/cutlass/fragment.h new file mode 100644 index 0000000000..53fa380c20 --- /dev/null +++ b/cutlass/fragment.h @@ -0,0 +1,278 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines Fragment, a statically-sized array for storing parts of matrices within a + thread's registers. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup fragment_concept Fragment Concept +@{ + +\ref fragment_concept is a statically sized array for storing parts of tiles held by individual CUDA +threads. + +@par \ref fragment_concept + Types satisfying \ref fragment_concept define the following members + - Element - type of each access held within the fragment + - kElements - number of elements stored by the fragment + - clear() - overwrites the fragment storage with zeros + - Element & operator[](int i) - by-reference access of the ith element + - Element const & operator[](int i) const - const by-reference access of the ith element +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup fragment_iterator_concept Fragment Iterator Concept +@{ + +\ref fragment_iterator_concept provides structured access to the elements within a fragment with an +optional bitcast to the desired access type + +@par \ref fragment_iterator_concept + Types satisfying \ref fragment_iterator_concept define the following members + - AccessType& operator[](int i) - provides access to the ith element of the fragment + - AccessType& at(int d, int h, int w, int c) - applies \ref layout_concept to fragment and +provides access to element at (d, h, w, c) + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct StorageType { + typedef uint64_t Type; +}; +template <> +struct StorageType<4> { + typedef uint32_t Type; +}; +template <> +struct StorageType<2> { + typedef uint16_t Type; +}; +template <> +struct StorageType<1> { + typedef uint8_t Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref fragment_concept +* @concept{fragment_concept} +*/ +template +struct Fragment : public AlignedStruct { + /// Make sure the alignment makes sense wrt the size of elements. + static_assert(kAlignment_ == 16 || kAlignment_ >= sizeof(Element_), "Alignment is too small"); + /// Alignment must be a power of two + static_assert(is_pow2::value, "Alignment must be a power of two"); + + /// This class. + typedef Fragment This_; + /// The element. + typedef Element_ Element; + /// The number of elements. + static int const kElements = kElements_; + + /// Clear a fragment. + CUTLASS_DEVICE void clear() { + // Avoid element-wise access for sub 32b element type + if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) { + uint64_t* ptr = reinterpret_cast(storage); + for (int i = 0; i < (kElements * sizeof(Element)) / 8; ++i) { + ptr[i] = uint64_t(0); + } + } else if (kAlignment_ >= 4 && (kElements * sizeof(Element)) % 4 == 0) { + uint32_t* ptr = reinterpret_cast(storage); + for (int i = 0; i < (kElements * sizeof(Element)) / 4; ++i) { + ptr[i] = uint32_t(0); + } + } else if (kAlignment_ >= 2 && (kElements * sizeof(Element)) % 2 == 0) { + uint16_t* ptr = reinterpret_cast(storage); + for (int i = 0; i < (kElements * sizeof(Element)) / 2; ++i) { + ptr[i] = uint16_t(0); + } + } else { + for (int i = 0; i < kElements; ++i) { + storage[i] = 0; + } + } + } + + /// The accessor. + CUTLASS_DEVICE Element& operator[](int i) { + assert(i < kElements_); + return reinterpret_cast(storage)[i]; + } + + /// The accessor. + CUTLASS_DEVICE Element const& operator[](int i) const { + assert(i < kElements_); + return reinterpret_cast(storage)[i]; + } + + private: + /// Storage type to use for Elements + typedef typename StorageType::Type StorageType; + + /// Number of elements in the storage + static int const kStorageCount = + (sizeof(Element_) * kElements_ + sizeof(StorageType) - 1) / sizeof(StorageType); + /// The storage. + StorageType storage[kStorageCount]; + + /// Ensure that there's enough storage for all elements + static_assert(sizeof(StorageType) <= kAlignment_, "StorageType is too big for given alignment"); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref fragment_iterator_concept +* @concept{fragment_iterator_concept} +*/ +template +struct FragmentIterator { + /// This class. + typedef FragmentIterator This_; + /// The fragment. + typedef Fragment_ Fragment; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The access type. + typedef AccessType_ AccessType; + + /// The element. + typedef typename Fragment::Element Element; + /// The number of elements per access. + static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element)); + /// The shape of the the fragment. + typedef typename ShapeMul >::Shape FragmentShape; + /// The linear strides for iterations. + typedef typename ShapeStrides::Shape Strides; + + /// Ctor. + template + CUTLASS_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0) + : pointer(reinterpret_cast(&fragment[offset])) { + static_assert(OtherFragment_::kElements >= Fragment::kElements, ""); + } + + /// The accessor. + CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { + int const imm = ComputeOffsetFromStrides::get(d, h, w, c); + return reinterpret_cast(pointer[imm]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType& at(int d, int h, int w, int c = 0) { + int const imm = ComputeOffsetFromStrides::get(d, h, w, c); + return reinterpret_cast(pointer[imm]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType const& operator[](int i) const { + return reinterpret_cast(pointer[i * kElementsPerAccess]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType& operator[](int i) { + return reinterpret_cast(pointer[i * kElementsPerAccess]); + } + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + + /// The pointer. + Element* pointer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct FragmentConstIterator { + /// This class. + typedef FragmentIterator This_; + /// The fragment. + typedef Fragment_ Fragment; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The access type. + typedef AccessType_ AccessType; + + /// The element. + typedef typename Fragment::Element Element; + /// The number of elements per access. + static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element)); + /// The shape of the the fragment. + typedef typename ShapeMul >::Shape FragmentShape; + /// The linear strides for iterations. + typedef typename ShapeStrides::Shape IterationsStrides; + + /// Ctor. + template + CUTLASS_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0) + : pointer(reinterpret_cast(&fragment[offset])) { + static_assert(OtherFragment_::kElements >= Fragment::kElements, ""); + } + /// Create from non-constant FragmentIterator + CUTLASS_DEVICE FragmentConstIterator( + FragmentIterator const& rhs_) + : pointer(reinterpret_cast(rhs_.offset)) {} + + /// The accessor. + CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const { + int const imm = ComputeOffsetFromStrides::get(d, h, w, c); + return reinterpret_cast(pointer[imm]); + } + + /// The accessor. + CUTLASS_DEVICE AccessType const& operator[](int i) const { + return reinterpret_cast(pointer[i * kElementsPerAccess]); + } + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + + /// The pointer. + Element const* pointer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/fragment_load_store.h b/cutlass/fragment_load_store.h new file mode 100644 index 0000000000..a7d272e9e3 --- /dev/null +++ b/cutlass/fragment_load_store.h @@ -0,0 +1,135 @@ +/*************************************************************************************************** + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines accessors for loading and storing fragments to memory efficiently. +*/ +#pragma once + +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct FragmentLoad {}; + +template +struct FragmentLoad { + /// The output type. + typedef FragmentElement_ AccessType; + + /// The load function. + static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { + value.load(&pointer[offset], kStride); + } +}; + +template +struct FragmentLoad { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The load function. + static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) { + Load::load(value, pointer, offset); + } +}; + +template +struct FragmentStore {}; + +template +struct FragmentStore { + /// The input type. + typedef FragmentElement_ AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { + value.store(&pointer[offset], kStride); + } +}; + +template +struct FragmentStore { + /// The input type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) { + Store::store(value, pointer, offset); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} /// namespace cutlass diff --git a/cutlass/fragment_multiply_add.h b/cutlass/fragment_multiply_add.h new file mode 100644 index 0000000000..2d31e793bf --- /dev/null +++ b/cutlass/fragment_multiply_add.h @@ -0,0 +1,131 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines multiply-add operations on fragments within a thread. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct FragmentMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The type for A. + typedef Scalar_ ScalarA; + /// The type for B. + typedef Scalar_ ScalarB; + /// The type for C and D. + typedef Scalar_ ScalarC; + + /// Ctor. + CUTLASS_DEVICE FragmentMultiplyAdd() {} + + /// Multiply : d = a*b. + template + CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const& b, Fragment_& d) { + for (int j = 0; j < Fragment_::kElements; ++j) { + d[j] = a * b[j]; + } + } + + /// Multiply : d = a*b + c. + template + CUTLASS_DEVICE void multiply_add(Scalar_ a, + Fragment_ const& b, + Fragment_ const& c, + Fragment_& d) { + for (int j = 0; j < Fragment_::kElements; ++j) { + d[j] = a * b[j] + c[j]; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +template <> +struct FragmentMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The type for A. + typedef half ScalarA; + /// The type for B. + typedef half ScalarB; + /// The type for C and D. + typedef half ScalarC; + + /// Ctor. + CUTLASS_DEVICE FragmentMultiplyAdd() {} + + /// Multiply : d = a*b. + template + CUTLASS_DEVICE void multiply(half a, Fragment_ const& b, Fragment_& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 + // The input. + __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); + // The output. + __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + + // Assemble a half2 from a. + __half2 const a_half2 = __half2half2(a); + + for (int i = 0; i < Fragment_::kElements / 2; ++i) { + d_half2[i] = __hmul2(a_half2, b_half2[i]); + } +#endif + } + + /// Multiply : d = a*b + c. + template + CUTLASS_DEVICE void multiply_add(half a, Fragment_ const& b, Fragment_ const& c, Fragment_& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 + // The inputs. + __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); + __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]); + // The output. + __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + + // Assemble a half2 from a. + __half2 const a_half2 = __half2half2(a); + + for (int i = 0; i < Fragment_::kElements / 2; ++i) { + d_half2[i] = __hfma2(a_half2, b_half2[i], c_half2[i]); + } +#endif + } +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/block_loader.h b/cutlass/gemm/block_loader.h deleted file mode 100644 index 0c5b3faa91..0000000000 --- a/cutlass/gemm/block_loader.h +++ /dev/null @@ -1,162 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * block-wide tile-loading abstractions - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * load_algorithm - ******************************************************************************/ - -/** - * \brief Enumeration of matrix loading algorithms - */ -struct load_algorithm -{ - /// \brief Enumerants. See corresponding tag types. - enum kind_t - { - CongruousCopy = 0, - CrosswiseCopy = 1, - }; - - /** - * \brief Generic tag - */ - template - struct any_tag : nv_std::integral_constant {}; - - /** - * \brief Copy from a global matrix that is row-major in relation - * to the local row-major tile - */ - typedef any_tag contiguous_tag_t; - - /** - * \brief Copy from a global matrix that is column-major in relation - * to the local row-major tile - */ - typedef any_tag crosswise_tag_t; - -}; - - -/****************************************************************************** - * block_loader - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared - * block-wide tile is a row-major (L-major) tiling of dp_vector_t items, which are - * themselves column-major (K-major) vectors of value_t. Its dimensions are: - * K = BlockDpVectorsK * (sizeof(dp_vector_t) / sizeof(value_t) - * L = BlockDpVectorsL - * - * NB: This generic class is not directly constructible. Architecture- and - * algorithm-specific template specializations will provide the API - * functionality prescribed here. - * - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - typename value_t, ///< Input matrix value type - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles, ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions - typename dp_vector_t, ///< Dot-product vector type along the K-axis - load_algorithm::kind_t LoadAlgorithm> ///< Algorithm for loading a shared tile of KxL matrix data -struct block_loader -{ - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - block_loader( - value_t *d_matrix, ///< Pointer to input matrix - int matrix_values_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_values_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_values_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 block_begin_item_coords, ///< Thread block's starting value_t coordinates (l, k) within the input matrix - int block_end_item_k); ///< Thread block's ending coordinate (k) within the input matrix (one-past) - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - void request(); - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - void next(); - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride _BlockDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - void commit( - dp_vector_t (&scratch_tile)[BlockDpVectorsK][_BlockDpVectorsL]); - -}; - - -} // namespace gemm -} // namespace cutlass - - -/****************************************************************************** - * Tail-include specializations that adhere to the block_loader API - ******************************************************************************/ - -#include "block_loader_crosswise.h" -#include "block_loader_congruous_dp1.h" -#include "block_loader_congruous_idp4.h" diff --git a/cutlass/gemm/block_loader_congruous_dp1.h b/cutlass/gemm/block_loader_congruous_dp1.h deleted file mode 100644 index 80dca26dd7..0000000000 --- a/cutlass/gemm/block_loader_congruous_dp1.h +++ /dev/null @@ -1,406 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_loader (CongruousCopy + dp1 specialization) - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * (CongruousCopy + dp1 specialization) - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared block-wide tile is - * a row-major (L-major) tiling of singleton "dp1" dp_vector_t items, where - * dp_vector_t == value_t. Its dimensions are: - * K = BlockDpVectorsK - * L = BlockDpVectorsL - * - * The data is copied from a corresponding tile of global matrix data whose - * layout of value_t is also L-major. This constitutes a CongruousCopy - * between the L-major global tile and the L-major shared tile. - * - * NB: Because they are "dp1" singletons, the K-major orientation of - * dp_vector_t in shared memory is irrelevant, and the L-major global and - * shared tile layouts are perfectly congruous. As a result, we can increase - * the granularity of data transfer via vectorization of loads and stores - * without any intermediate {dis|re}assembly. - * - * NB: Consecutive threads within a block are mapped in L-major - * fashion across a first-set of LDG-vectors of dp_vector_t (value_t) within - * their global tile. Successive sets of LDG-vectors are then strip-mined - * as necessary down the K-axis. These discontiguous LDG-vectors comprise the - * thread's "slice" of the block-wide tile. - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - typename value_t, ///< Input matrix value type - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_loader< - BlockThreads, - BlockDpVectorsK, - BlockDpVectorsL, - value_t, - LeadingDimAlignBytes, - AllowRaggedTiles, - value_t, ///< Dot-product vector type along the K-axis (dp1 specialization) - load_algorithm::CongruousCopy> ///< Algorithm for loading a shared tile of KxL matrix data (CongruousCopy specialization) -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Dot-product vector type along the K-axis - typedef value_t dp_vector_t; - - enum - { - /// Number of value_t in a dp_vector_t - DpVectorItems = divide_assert::value, - - /// Number of dp_vector_t in a block-wide tile - BlockDpVectors = BlockDpVectorsK * BlockDpVectorsL, - - /// Number of dp_vector_t in a thread-tile - ThreadDpVectors = divide_assert::value, - }; - - /// Data movement type, coarsened by LeadingDimAlignBytes, capped by the - /// smaller of either ThreadDpVectors or BlockDpVectorsL - typedef io_vector< - dp_vector_t, - __NV_STD_MIN(ThreadDpVectors, BlockDpVectorsL), - LeadingDimAlignBytes> - ldg_vector_t; - - enum - { - /// Number of dp_vector_t per ldg_vector_t - LdgVectorDpVectors = ldg_vector_t::VectorItems, - - /// Number of value_t per ldg_vector_t - LdgVectorItems = LdgVectorDpVectors * DpVectorItems, - - - - /// Total number of ldg_vector_t within each block-wide tile - BlockLdgVectors = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = BlockDpVectorsK, - - - - /// Number of ldg_vector_t within each thread-tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along L-axis - ThreadLdgVectorsL = __NV_STD_MAX(1, (BlockLdgVectorsL / BlockThreads)), - - /// Extent of the thread tile in ldg_vector_t along K-axis - ThreadLdgVectorsK = divide_assert::value, - - - - /// Number of ldg_vector_t within each stripmine-tile - StripmineLdgVectors = BlockThreads, - - /// Extent of the stripmine tile in ldg_vector_t along L-axis - StripmineLdgVectorsL = __NV_STD_MIN(BlockLdgVectorsL, StripmineLdgVectors), - - /// Extent of the stripmine tile in ldg_vector_t along K-axis - StripmineLdgVectorsK = divide_assert::value, - - - - /// Alignment in dp_vector_t along L needed for committing prefetch - AlignmentDpVectorsL = LdgVectorDpVectors, - }; - - /// Predicate bit vector - typedef uint64_t predicate_mask_t; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - static_assert( - (ThreadLdgVectors <= sizeof(predicate_mask_t) * 8), - "Predicate mask type does not contain enough bits for encoding load predicates"); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Input pointer to matrix in ldg_vector_t - ldg_vector_t *d_matrix_ldgvecs; - - /// Extent of the input matrix in ldg_vector_t along the L-axis - int matrix_ldgvecs_l; - - /// Thread block's ending ldg_vector_t coordinate (k) within the input matrix (one-past) - int block_end_ldgvec_k; - - /// Predicate bits for guarding ldg_vector_t loads within "whole-k" block-wide tiles - predicate_mask_t guard; - - /// Predicate bits for guarding ldg_vector_t loads within the final block-wide "residue" tile - predicate_mask_t residue_guard; - - /// Iteration span in "whole-k" block-wide tiles - int wholek_tiles_remaining; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_ldgvec_stride_k; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the L-axis - int matrix_ldgvec_stride_l; - - /// ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - int2 block_thread_ldgvec_coords; - - /// Thread-wide tile of prefetch data - ldg_vector_t thread_tile[ThreadLdgVectorsK][ThreadLdgVectorsL]; - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader( - value_t *d_matrix_items, ///< Input pointer to matrix in value_t - int matrix_items_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_items_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_items_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 matrix_block_item_coords, ///< value_t coordinates (l, k) of first block-wide tile within the input matrix - int block_end_item_k) ///< Thread block's ending coordinate (k) within the input matrix (one-past) - : - block_end_ldgvec_k(block_end_item_k), - guard(0), - residue_guard(0) - { - matrix_ldgvecs_l = matrix_items_l / LdgVectorItems; - matrix_ldgvec_stride_k = matrix_items_stride_k / LdgVectorItems, - matrix_ldgvec_stride_l = matrix_items_stride_l; - - // ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - block_thread_ldgvec_coords = make_int2( - threadIdx.x % BlockLdgVectorsL, // l-coordinate - threadIdx.x / BlockLdgVectorsL); // k-coordinate - - // ldg_vector_t coordinates (l, k) of first block-wide tile within the input matrix - int2 matrix_block_ldgvec_coords = make_int2( - matrix_block_item_coords.x / LdgVectorItems, // l-coordinate - matrix_block_item_coords.y); // k-coordinate - - // Iteration span in ldg_vector_t - int span_ldgvec_k = (block_end_item_k - matrix_block_item_coords.y); - - - - // ldg_vector_t coordinates (l, k) of first thread-tile tile within the input matrix - int2 matrix_thread_ldgvec_coords = make_int2( - block_thread_ldgvec_coords.x + matrix_block_ldgvec_coords.x, - block_thread_ldgvec_coords.y + matrix_block_ldgvec_coords.y); - - // Iteration range in "whole-k" block-wide tiles - wholek_tiles_remaining = span_ldgvec_k / BlockLdgVectorsK; - - // Extent of final residue-tile in ldg_vector_t along K-axis - int residue_ldgvecs_k = span_ldgvec_k % BlockLdgVectorsK; - - // Initialize I/O predicates - if (AllowRaggedTiles) - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Whether block_ldgvec_coords.y is valid in the final residue tile - predicate_mask_t valid_k = (block_ldgvec_k < residue_ldgvecs_k); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Whether block_ldgvec_coords.x is valid any block-wide tile - predicate_mask_t valid_l = (matrix_block_ldgvec_coords.x + block_ldgvec_l < matrix_ldgvecs_l); - - // Linear index of ldg_vector_t load - int ldgvec_idx = thread_ldgvec_l + (thread_ldgvec_k * ThreadLdgVectorsL); - - // Set predicate guard bits - guard |= (valid_l << ldgvec_idx); - residue_guard |= ((valid_l & valid_k) << ldgvec_idx); - } - } - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - - // Update the input pointer to be matrix_thread_ldgvec_coords - this->d_matrix_ldgvecs = - reinterpret_cast(d_matrix_items) + - (matrix_thread_ldgvec_coords.y * matrix_ldgvec_stride_k) + - (matrix_thread_ldgvec_coords.x * matrix_ldgvec_stride_l); - } - - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - inline __device__ - void request() - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // Linear index of ldg_vector_t load - int ldgvec_idx = (thread_ldgvec_k * ThreadLdgVectorsL) + thread_ldgvec_l; - - // Unpack predicate guard - predicate_mask_t valid = ((guard >> ldgvec_idx) & 1); - - if (!AllowRaggedTiles || valid) - { - // Perform load - thread_tile[thread_ldgvec_k][thread_ldgvec_l].load( - d_matrix_ldgvecs + - (thread_ldgvec_k * StripmineLdgVectorsK * matrix_ldgvec_stride_k) + - (thread_ldgvec_l * StripmineLdgVectorsL * matrix_ldgvec_stride_l)); - } - else - { - // Zero-initialize - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - thread_tile[thread_ldgvec_k][thread_ldgvec_l].buff[dpvec] = 0; - } - } - } - } - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - inline __device__ - void next() - { - d_matrix_ldgvecs += (matrix_ldgvec_stride_k * BlockLdgVectorsK); - - if (AllowRaggedTiles) - { - --wholek_tiles_remaining; - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - } - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride SmemDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - inline __device__ - void commit( - dp_vector_t (&scratch_tile)[BlockDpVectorsK][SmemDpVectorsL]) - { - static_assert(SmemDpVectorsL >= BlockDpVectorsL, "Row stride must be >= tile width."); - - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - thread_tile[thread_ldgvec_k][thread_ldgvec_l].store( - &scratch_tile[block_ldgvec_k][block_ldgvec_l * LdgVectorDpVectors]); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_loader_congruous_idp4.h b/cutlass/gemm/block_loader_congruous_idp4.h deleted file mode 100644 index 686da1dbf1..0000000000 --- a/cutlass/gemm/block_loader_congruous_idp4.h +++ /dev/null @@ -1,544 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_loader (CongruousCopy + idp4 specialization) - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * (CongruousCopy + idp4 specialization) - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared block-wide tile is - * a row-major (L-major) tiling of int32_t dp_vector_t, which are themselves - * column-major (K-major) vectors of int8_t value_t. Its dimensions are: - * K = BlockDpVectorsK * (sizeof(dp_vector_t) / sizeof(value_t) - * L = BlockDpVectorsL - * - * The data is copied from a corresponding tile of global matrix data whose - * layout of value_t is also L-major. This constitutes a CongruousCopy between - * the L-major global tile and the L-major shared tile. - * - * NB: The K-major value_t in shared dp_vector_t are imperfectly congruous - * with the L-major value_t in global memory. As a result, the granularity - * of data transfer is a "dp-square" of (DpVectorItems * DpVectorItems) values - * that must be transposed from L-oriented dp_vector_t to K-oriented - * dp_vector_t prior to commitment. - * - * NB: Consecutive threads within a block are mapped in L-major - * fashion across a first-set of squares within their global tile. Successive - * sets of squares are then strip-mined as necessary down the K-axis. These - * discontiguous squares comprise the thread's "slice" of the block-wide tile. - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int _BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int _BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_loader< - BlockThreads, - _BlockDpVectorsK, - _BlockDpVectorsL, - int8_t, ///< Input matrix value type (idp4 specialization) - LeadingDimAlignBytes, - AllowRaggedTiles, - int32_t, ///< Dot-product vector type along the K-axis (idp4 specialization) - load_algorithm::CongruousCopy> ///< Algorithm for loading a shared tile of KxL matrix data (CrosswiseCopy specialization) -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Input matrix value type - typedef int8_t value_t; - - /// Dot-product vector type along the K-axis - typedef int32_t dp_vector_t; - - enum - { - /// Number of value_t in a dp_vector_t - DpVectorItems = divide_assert::value, - - /// Number of dp_vector_t in a block-wide tile - BlockDpVectors = _BlockDpVectorsK * _BlockDpVectorsL, - - /// Number of dp_vector_t in a thread-tile - ThreadDpVectors = divide_assert::value, - - /// Number of dp_vector_t in a dp-square - SquareDpVectors = DpVectorItems, - - /// Number of dp-square tiles in a thread-tile - ThreadSquares = divide_assert::value, - - /// Extent of block-wide tile in transposed dp_vector_t along the K-axis (height) - BlockTransDpVectorsK = _BlockDpVectorsK * DpVectorItems, - - /// Extent of block-wide tile in transposed dp_vector_t along the L-axis (height) - BlockTransDpVectorsL = divide_assert<_BlockDpVectorsL, DpVectorItems>::value, - - - }; - - /// Load-from-global data movement type, coarsened by LeadingDimAlignBytes, capped by the - /// smaller of either ThreadSquares or BlockTransDpVectorsL - typedef io_vector< - dp_vector_t, - __NV_STD_MIN(ThreadSquares, BlockTransDpVectorsL), - LeadingDimAlignBytes> - ldg_vector_t; - - /// Store-to-shared data movement type equivalent to a dp-square - typedef io_vector< - dp_vector_t, - SquareDpVectors> - sts_vector_t; - - enum - { - /// Number of dp_vector_t per ldg_vector_t - LdgVectorDpVectors = ldg_vector_t::VectorItems, - - /// Number of value_t per ldg_vector_t - LdgVectorItems = LdgVectorDpVectors * DpVectorItems, - - - - /// Total number of ldg_vector_t within each block-wide tile - BlockLdgVectors = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = BlockTransDpVectorsK, - - - - /// Number of ldg_vector_t within each thread-tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along L-axis - ThreadLdgVectorsL = __NV_STD_MAX(1, (BlockLdgVectorsL / BlockThreads)), - - /// Extent of the thread tile in ldg_vector_t along K-axis - ThreadLdgVectorsK = divide_assert::value, - - /// Extent of the thread tile in dp-square tiles along K-axis - ThreadSquaresK = divide_assert::value, - - - - /// Number of ldg_vector_t within each stripmine-tile - StripmineLdgVectors = BlockThreads * SquareDpVectors, - - /// Extent of the stripmine tile in ldg_vector_t along L-axis - StripmineLdgVectorsL = __NV_STD_MIN(BlockLdgVectorsL, BlockThreads), - - /// Extent of the stripmine tile in ldg_vector_t along K-axis - StripmineLdgVectorsK = divide_assert::value, - - /// Extent of the stripmine tile in dp-square tiles along K-axis - StripmineSquaresK = divide_assert::value, - - - - /// Alignment in dp_vector_t along L needed for committing prefetch - AlignmentDpVectorsL = LdgVectorDpVectors, - }; - - /// Predicate mask type - typedef uint32_t predicate_mask_t; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - static_assert((LeadingDimAlignBytes >= 4) && (LeadingDimAlignBytes % 4 == 0), - "Alignment for matrix operands to IGEMM must be a multiple of 4 bytes."); - - static_assert( - (ThreadLdgVectors <= sizeof(predicate_mask_t) * 8), - "Predicate mask type does not contain enough bits for encoding load predicates"); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Input pointer to matrix in ldg_vector_t - ldg_vector_t *d_matrix_ldgvecs; - - /// Extent of the input matrix in ldg_vector_t along the L-axis - int matrix_ldgvecs_l; - - /// Thread block's ending ldg_vector_t coordinate (k) within the input matrix (one-past) - int block_end_ldgvec_k; - - /// Predicate bits for guarding ldg_vector_t loads within "whole-k" block-wide tiles - predicate_mask_t guard; - - /// Predicate bits for guarding ldg_vector_t loads within the final block-wide "residue" tile - predicate_mask_t residue_guard; - - /// Iteration span in "whole-k" block-wide tiles - int wholek_tiles_remaining; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_ldgvec_stride_k; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the L-axis - int matrix_ldgvec_stride_l; - - /// ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - int2 block_thread_ldgvec_coords; - - /// Thread-wide tile of prefetch data - ldg_vector_t thread_tile[ThreadSquaresK][SquareDpVectors][ThreadLdgVectorsL]; - - - - //------------------------------------------------------------------------- - // Utility methods - //------------------------------------------------------------------------- - - - /** - * \brief Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and reassemble them into a 32-bit destination register. For SM2.0 or later. - * - * \par - * The bytes in the two source registers \p a and \p b are numbered from 0 to 7: - * {\p b, \p a} = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each of the four bytes - * {b3, b2, b1, b0} selected in the return value, a 4-bit selector is defined within - * the four lower "nibbles" of \p index: {\p index } = {n7, n6, n5, n4, n3, n2, n1, n0} - * - * \par Snippet - * The code snippet below illustrates byte-permute. - * \par - * \code - * #include - * - * __global__ void ExampleKernel(...) - * { - * int a = 0x03020100; - * int b = 0x07060504; - * int index = 0x00007531; - * - * int selected = prmt(a, b, index); // 0x07050301 - * - * \endcode - * - */ - inline __device__ - int32_t prmt(int32_t a, int32_t b, unsigned int index) - { - int ret; - asm volatile("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); - return ret; - } - - - /** - * Convert a "dp-square" from L-major to K-major - */ - inline __device__ - void transpose_dp_square(dp_vector_t (&dp_square)[SquareDpVectors]) - { - // Transpose dp_vector_t squares - int32_t y = prmt(dp_square[0], dp_square[1], 0x00007362); - int32_t w = prmt(dp_square[2], dp_square[3], 0x00007362); - int32_t x = prmt(dp_square[0], dp_square[1], 0x00005140); - int32_t z = prmt(dp_square[2], dp_square[3], 0x00005140); - - dp_square[0] = prmt(x, z, 0x00005410); - dp_square[1] = prmt(x, z, 0x00007632); - dp_square[2] = prmt(y, w, 0x00005410); - dp_square[3] = prmt(y, w, 0x00007632); - } - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader( - value_t *d_matrix_items, ///< Input pointer to matrix in value_t - int matrix_items_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_items_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_items_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 matrix_block_item_coords, ///< value_t coordinates (l, k) of first block-wide tile within the input matrix - int block_end_item_k) ///< Thread block's ending coordinate (k) within the input matrix (one-past) - : - block_end_ldgvec_k(block_end_item_k), - guard(0), - residue_guard(0) - { - matrix_ldgvecs_l = matrix_items_l / LdgVectorItems; - matrix_ldgvec_stride_k = matrix_items_stride_k / LdgVectorItems, - matrix_ldgvec_stride_l = matrix_items_stride_l; - - // ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - block_thread_ldgvec_coords = make_int2( - threadIdx.x % BlockLdgVectorsL, // l-coordinate - (threadIdx.x / BlockLdgVectorsL) * SquareDpVectors); // k-coordinate - - // ldg_vector_t coordinates (l, k) of first block-wide tile within the input matrix - int2 matrix_block_ldgvec_coords = make_int2( - matrix_block_item_coords.x / LdgVectorItems, // l-coordinate - matrix_block_item_coords.y); // k-coordinate - - // Iteration span in ldg_vector_t - int span_ldgvec_k = (block_end_item_k - matrix_block_item_coords.y); - - - - // ldg_vector_t coordinates (l, k) of first thread-tile tile within the input matrix - int2 matrix_thread_ldgvec_coords = make_int2( - block_thread_ldgvec_coords.x + matrix_block_ldgvec_coords.x, - block_thread_ldgvec_coords.y + matrix_block_ldgvec_coords.y); - - // Iteration range in "whole-k" block-wide tiles - wholek_tiles_remaining = span_ldgvec_k / BlockLdgVectorsK; - - // Extent of final residue-tile in ldg_vector_t along K-axis - int residue_ldgvecs_k = span_ldgvec_k % BlockLdgVectorsK; - - // Initialize I/O predicates - if (AllowRaggedTiles) - { - // Iterate through rows of squares in thread tile - #pragma unroll - for (int thread_square_k = 0; thread_square_k < ThreadSquaresK; ++thread_square_k) - { - // Iterate through rows of dp_vector_t in each square - #pragma unroll - for (int square_dpvec = 0; square_dpvec < SquareDpVectors; ++square_dpvec) - { - // ldg_vector_t K-coordinate in block-wide tile (K-axis strip-mining of ldg_vector_t within block-tile) - int block_ldgvec_k = - block_thread_ldgvec_coords.y + - (thread_square_k * StripmineLdgVectorsK) + - square_dpvec; - - // Whether block_ldgvec_coords.y is valid in the final residue tile - predicate_mask_t valid_k = (block_ldgvec_k < residue_ldgvecs_k); - - // L-axis strip-mining of block-tile - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // ldg_vector_t L-coordinate in block-wide tile (L-axis strip-mining of ldg_vector_t within block-tile) - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Whether block_ldgvec_coords.x is valid any block-wide tile - predicate_mask_t valid_l = (matrix_block_ldgvec_coords.x + block_ldgvec_l < matrix_ldgvecs_l); - - // Linear index of ldg_vector_t load - int ldgvec_idx = - (thread_square_k * SquareDpVectors * ThreadLdgVectorsL) + - (square_dpvec * ThreadLdgVectorsL) + - thread_ldgvec_l; - - // Set predicate guard bits - guard |= (valid_l << ldgvec_idx); - residue_guard |= ((valid_l & valid_k) << ldgvec_idx); - } - } - } - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - - // Update the input pointer to be matrix_thread_ldgvec_coords - this->d_matrix_ldgvecs = - reinterpret_cast(d_matrix_items) + - (matrix_thread_ldgvec_coords.y * matrix_ldgvec_stride_k) + - (matrix_thread_ldgvec_coords.x * matrix_ldgvec_stride_l); - } - - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - inline __device__ - void request() - { - // Each thread iterates through the ldg_vector_t in its thread tile - - // Iterate through rows of squares in thread tile - #pragma unroll - for (int thread_square_k = 0; thread_square_k < ThreadSquaresK; ++thread_square_k) - { - // Iterate through rows of dp_vector_t in each square - #pragma unroll - for (int square_dpvec = 0; square_dpvec < SquareDpVectors; ++square_dpvec) - { - // Iterate through ldg_vector_t in each row - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // Linear index of ldg_vector_t load - int ldgvec_idx = - (thread_square_k * SquareDpVectors * ThreadLdgVectorsL) + - (square_dpvec * ThreadLdgVectorsL) + - thread_ldgvec_l; - - // Unpack predicate guard - predicate_mask_t valid = ((guard >> ldgvec_idx) & 1); - - if (!AllowRaggedTiles || valid) - { - // Perform load - thread_tile[thread_square_k][square_dpvec][thread_ldgvec_l].load( - d_matrix_ldgvecs + - (((thread_square_k * StripmineLdgVectorsK) + square_dpvec) * matrix_ldgvec_stride_k) + - (thread_ldgvec_l * StripmineLdgVectorsL * matrix_ldgvec_stride_l)); - } - else - { - // Zero-initialize - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - thread_tile[thread_square_k][square_dpvec][thread_ldgvec_l].buff[dpvec] = 0; - } - } - } - } - } - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - inline __device__ - void next() - { - d_matrix_ldgvecs += (matrix_ldgvec_stride_k * BlockLdgVectorsK); - - if (AllowRaggedTiles) - { - --wholek_tiles_remaining; - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - } - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride SmemDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - inline __device__ - void commit( - dp_vector_t (&scratch_tile)[_BlockDpVectorsK][SmemDpVectorsL]) - { - static_assert(SmemDpVectorsL >= _BlockDpVectorsL, "Row stride must be >= tile width."); - - // Square K-coordinate of thread tile in block-wide tile - int block_thread_square_k = block_thread_ldgvec_coords.y / SquareDpVectors; - - // Iterate through rows of squares in thread tile - #pragma unroll - for (int thread_square_k = 0; thread_square_k < ThreadSquaresK; ++thread_square_k) - { - // Square K-coordinate in block-wide tile (K-axis strip-mining of squares within block-tile) - int block_square_k = block_thread_square_k + (thread_square_k * StripmineSquaresK); - - // Iterate through ldg_vector_t in each row - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // ldg_vector_t L-coordinate in block-wide tile (L-axis strip-mining of ldg_vector_t within block-tile) - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Iterate through squares in each ldg_vector_t - #pragma unroll - for (int ldgvec_dpvec_l = 0; ldgvec_dpvec_l < LdgVectorDpVectors; ++ldgvec_dpvec_l) - { - // Square L-coordinate in block-wide tile (L-axis raking of square-slices within ldg_vector_t) - int block_square_l = (block_ldgvec_l * LdgVectorDpVectors) + ldgvec_dpvec_l; - - // Assemble square of L-major dp_vector_t from stack of slices - sts_vector_t square; - - // Iterate through rows of dp_vector_t in each square - #pragma unroll - for (int square_dpvec = 0; square_dpvec < SquareDpVectors; ++square_dpvec) - { - square.buff[square_dpvec] = thread_tile[thread_square_k][square_dpvec][thread_ldgvec_l].buff[ldgvec_dpvec_l]; - } - - // Un-transpose square from L-major to K-major - transpose_dp_square(square.buff); - - // Store dp-square - square.store(&scratch_tile[block_square_k][block_square_l * SquareDpVectors]); - } - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_loader_crosswise.h b/cutlass/gemm/block_loader_crosswise.h deleted file mode 100644 index 13660e6780..0000000000 --- a/cutlass/gemm/block_loader_crosswise.h +++ /dev/null @@ -1,411 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_loader (CrosswiseCopy specialization) - ******************************************************************************/ - -/** - * \brief A three-phase data loading abstraction (prefetch, commit, and - * advance) for iterating over ranges of block-wide matrix tiles. - * (CrosswiseCopy specialization) - * - * Each iteration sequence produces a KxL (height-by-width) block-wide tile of - * value_t in shared memory. The layout of the shared block-wide tile is - * a row-major (L-major) tiling of dp_vector_t items, which are themselves - * column-major (K-major) vectors of value_t. Its dimensions are: - * K = BlockDpVectorsK * (sizeof(dp_vector_t) / sizeof(value_t) - * L = BlockDpVectorsL - * - * The data is copied from a corresponding tile of global matrix data whose - * layout of value_t is K-major. This constitutes a CrosswiseCopy between - * the K-major global tile and the L-major shared tile. - * - * NB: The orientation of dp_vector_t components in shared memory is congruous - * with the global matrix data, so we can use dp_vector_t as the minimum - * granularity of data transfer without any intermediate {dis|re}assembly - * of its value_t components. However, the global and shared memory layouts - * of dp_vector_t items are cross-wise with respect to each other, so any - * further LDG-vectorization of dp_vector_t data requires intermediate - * disassembly into dp_vector_t components to be stored individually into - * the shared tile. - * - * NB: Consecutive threads within a block are mapped in K-major - * fashion down a first set of LDG-vectors of dp_vector_t within their global - * tile. Successive sets of LDG-vectors are then strip-mined as necessary - * across the L-axis. These discontiguous LDG-vectors comprise the thread's - * "slice" of the block-wide tile. - */ -template < - int BlockThreads, ///< Number of threads in each thread block (blockDim.x) - int BlockDpVectorsK, ///< Extent of block-wide tile in dp_vector_t along the K-axis (height) - int BlockDpVectorsL, ///< Extent of block-wide tile in dp_vector_t along the L-axis (width) - typename value_t, ///< Input matrix value type - int LeadingDimAlignBytes, ///< Byte alignment of input matrix leading dimension - bool AllowRaggedTiles, ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions - typename dp_vector_t> ///< Dot-product vector type along the K-axis -struct block_loader< - BlockThreads, - BlockDpVectorsK, - BlockDpVectorsL, - value_t, - LeadingDimAlignBytes, - AllowRaggedTiles, - dp_vector_t, - load_algorithm::CrosswiseCopy> ///< Algorithm for loading a shared tile of KxL matrix data (CrosswiseCopy specialization) -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of value_t in a dp_vector_t - DpVectorItems = divide_assert::value, - - /// Number of dp_vector_t in a block-wide tile - BlockDpVectors = BlockDpVectorsK * BlockDpVectorsL, - - /// Number of dp_vector_t in a thread-tile - ThreadDpVectors = divide_assert::value, - }; - - /// Data movement type, coarsened by LeadingDimAlignBytes, capped by the - /// smaller of either ThreadDpVectors or BlockDpVectorsK - typedef io_vector< - dp_vector_t, - __NV_STD_MIN(ThreadDpVectors, BlockDpVectorsK), - LeadingDimAlignBytes> - ldg_vector_t; - - enum - { - /// Number of dp_vector_t per ldg_vector_t - LdgVectorDpVectors = ldg_vector_t::VectorItems, - - /// Number of value_t per ldg_vector_t - LdgVectorItems = LdgVectorDpVectors * DpVectorItems, - - - - /// Total number of ldg_vector_t within each block-wide tile - BlockLdgVectors = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = divide_assert::value, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = BlockDpVectorsL, - - - - /// Number of ldg_vector_t within each thread-tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along K-axis - ThreadLdgVectorsK = __NV_STD_MAX(1, (BlockLdgVectorsK / BlockThreads)), - - /// Extent of the thread tile in ldg_vector_t along L-axis - ThreadLdgVectorsL = divide_assert::value, - - - - /// Number of ldg_vector_t within each stripmine-tile - StripmineLdgVectors = BlockThreads, - - /// Extent of the stripmine tile in ldg_vector_t along K-axis - StripmineLdgVectorsK = __NV_STD_MIN(BlockLdgVectorsK, StripmineLdgVectors), - - /// Extent of the stripmine tile in ldg_vector_t along L-axis - StripmineLdgVectorsL = divide_assert::value, - - - - /// Alignment in dp_vector_t along L needed for committing prefetch - AlignmentDpVectorsL = 1, - }; - - /// Predicate bit vector - typedef uint64_t predicate_mask_t; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - static_assert( - (ThreadLdgVectors <= sizeof(predicate_mask_t) * 8), - "Predicate mask type does not contain enough bits for encoding load predicates"); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Input pointer to matrix in ldg_vector_t - ldg_vector_t *d_matrix_ldgvecs; - - /// Extent of the input matrix in ldg_vector_t along the L-axis - int matrix_ldgvecs_l; - - /// Thread block's ending ldg_vector_t coordinate (k) within the input matrix (one-past) - int block_end_ldgvec_k; - - /// Predicate bits for guarding ldg_vector_t loads within "whole-k" block-wide tiles - predicate_mask_t guard; - - /// Predicate bits for guarding ldg_vector_t loads within the final block-wide "residue" tile - predicate_mask_t residue_guard; - - /// Iteration span in "whole-k" block-wide tiles - int wholek_tiles_remaining; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_ldgvec_stride_k; - - /// Distance in ldg_vector_t within pitched-linear memory between successive coordinates along the L-axis - int matrix_ldgvec_stride_l; - - /// ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - int2 block_thread_ldgvec_coords; - - /// Thread-wide tile of prefetch data - ldg_vector_t thread_tile[ThreadLdgVectorsK][ThreadLdgVectorsL]; - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader( - value_t *d_matrix_items, ///< Input pointer to matrix in value_t - int matrix_items_l, ///< Extent of the input matrix in value_t along the L-axis - int matrix_items_stride_k, ///< Distance in value_t within pitched-linear memory between successive coordinates along the K-axis - int matrix_items_stride_l, ///< Distance in value_t within pitched-linear memory between successive coordinates along the L-axis - int2 matrix_block_item_coords, ///< value_t coordinates (l, k) of first block-wide tile within the input matrix - int block_end_item_k) ///< Thread block's ending coordinate (k) within the input matrix (one-past) - : - block_end_ldgvec_k(block_end_item_k), - guard(0), - residue_guard(0) - { - matrix_ldgvecs_l = matrix_items_l; - matrix_ldgvec_stride_k = matrix_items_stride_k; - matrix_ldgvec_stride_l = (matrix_items_stride_l / LdgVectorItems); - - // ldg_vector_t coordinates (l, k) of thread-tile within the block-wide tile - block_thread_ldgvec_coords = make_int2( - (threadIdx.x / BlockLdgVectorsK), // l-coordinate - (threadIdx.x % BlockLdgVectorsK)); // k-coordinate - - // ldg_vector_t coordinates (l, k) of first block-wide tile within the input matrix - int2 matrix_block_ldgvec_coords = make_int2( - matrix_block_item_coords.x, // l-coordinate - matrix_block_item_coords.y / LdgVectorItems); // k-coordinate - - // Iteration span in ldg_vector_t - int span_ldgvec_k = (block_end_item_k - matrix_block_item_coords.y) / LdgVectorItems; - - - - // ldg_vector_t coordinates (l, k) of first thread-tile tile within the input matrix - int2 matrix_thread_ldgvec_coords = make_int2( - block_thread_ldgvec_coords.x + matrix_block_ldgvec_coords.x, - block_thread_ldgvec_coords.y + matrix_block_ldgvec_coords.y); - - // Iteration range in "whole-k" block-wide tiles - wholek_tiles_remaining = span_ldgvec_k / BlockLdgVectorsK; - - // Extent of final residue-tile in ldg_vector_t along K-axis - int residue_ldgvecs_k = span_ldgvec_k % BlockLdgVectorsK; - - // Initialize I/O predicates - if (AllowRaggedTiles) - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Whether block_ldgvec_coords.y is valid in the final residue tile - predicate_mask_t valid_k = (block_ldgvec_k < residue_ldgvecs_k); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Whether block_ldgvec_coords.x is valid any block-wide tile - predicate_mask_t valid_l = (matrix_block_ldgvec_coords.x + block_ldgvec_l < matrix_ldgvecs_l); - - // Linear index of ldg_vector_t load - int ldgvec_idx = thread_ldgvec_l + (thread_ldgvec_k * ThreadLdgVectorsL); - - // Set predicate guard bits - guard |= (valid_l << ldgvec_idx); - residue_guard |= ((valid_l & valid_k) << ldgvec_idx); - } - } - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - - // Update the input pointer to be matrix_thread_ldgvec_coords - this->d_matrix_ldgvecs = - reinterpret_cast(d_matrix_items) + - (matrix_thread_ldgvec_coords.y * matrix_ldgvec_stride_k) + - (matrix_thread_ldgvec_coords.x * matrix_ldgvec_stride_l); - } - - - //------------------------------------------------------------------------- - // Loader API - //------------------------------------------------------------------------- - - /** - * Request the current block-wide tile - */ - inline __device__ - void request() - { - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - // Linear index of ldg_vector_t load - int ldgvec_idx = (thread_ldgvec_k * ThreadLdgVectorsL) + thread_ldgvec_l; - - // Unpack predicate guard - predicate_mask_t valid = ((guard >> ldgvec_idx) & 1); - - if (!AllowRaggedTiles || valid) - { - // Perform load - thread_tile[thread_ldgvec_k][thread_ldgvec_l].load( - d_matrix_ldgvecs + - (thread_ldgvec_k * StripmineLdgVectorsK * matrix_ldgvec_stride_k) + - (thread_ldgvec_l * StripmineLdgVectorsL * matrix_ldgvec_stride_l)); - } - else - { - // Zero-initialize - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - thread_tile[thread_ldgvec_k][thread_ldgvec_l].buff[dpvec] = 0; - } - } - } - } - - - /** - * Advance the loader to the next block-wide tile in the K-axis - */ - inline __device__ - void next() - { - d_matrix_ldgvecs += (matrix_ldgvec_stride_k * BlockLdgVectorsK); - - if (AllowRaggedTiles) - { - --wholek_tiles_remaining; - - // Promote residue-guard to primary-guard if no full tiles remain - if (!wholek_tiles_remaining) - { - guard = residue_guard; - } - } - } - - - /** - * Commit the previously-requested block-wide tile to shared memory - * - * NB: To facilitate padding for avoiding shared memory bank conflicts, we - * allow the row stride SmemDpVectorsL to be arbitrarily bigger than the - * tile width BlockDpVectorsL. - */ - template - inline __device__ - void commit( - dp_vector_t (&scratch_tile)[BlockDpVectorsK][SmemDpVectorsL]) - { - static_assert(SmemDpVectorsL >= BlockDpVectorsL, "Row stride must be >= tile width."); - - // Outer thread-tile ldg_vector_t iteration (K-axis) - #pragma unroll - for (int thread_ldgvec_k = 0; thread_ldgvec_k < ThreadLdgVectorsK; ++thread_ldgvec_k) - { - int block_ldgvec_k = block_thread_ldgvec_coords.y + (thread_ldgvec_k * StripmineLdgVectorsK); - - // Inner thread-tile ldg_vector_t iteration (L-axis) - #pragma unroll - for (int thread_ldgvec_l = 0; thread_ldgvec_l < ThreadLdgVectorsL; ++thread_ldgvec_l) - { - int block_ldgvec_l = block_thread_ldgvec_coords.x + (thread_ldgvec_l * StripmineLdgVectorsL); - - // Write column of dp_vector_t - #pragma unroll - for (int dpvec = 0; dpvec < LdgVectorDpVectors; ++dpvec) - { - scratch_tile[(block_ldgvec_k * LdgVectorDpVectors) + dpvec][block_ldgvec_l] = - thread_tile[thread_ldgvec_k][thread_ldgvec_l].buff[dpvec]; - } - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_loader_wmma.h b/cutlass/gemm/block_loader_wmma.h deleted file mode 100644 index 5b586a1bd0..0000000000 --- a/cutlass/gemm/block_loader_wmma.h +++ /dev/null @@ -1,322 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Tile-loading abstraction for thread blocks - */ - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/** - * block-wide tile loader supporting congruous mapping of data from source and - * destination addressable storage. Typically, this will be used to load a - * block-wide tile from global memory into shared memory. - * - * This enables the caller to specify MatrixAlignBytes guarantees of the input pointer - * and performs memory operations on vectors. This increases the efficiency of - * memory operations and reduces the number of guard predicates needed. - * - */ -template < - bool congruous, ///< Indicates whether the "GEMM K" dimension refers to strided matrix dimension - int BlockThreads, ///< Number of threads participating in the streaming operation - int BlockItemsL, ///< Extent of block-wide tile in value_t along the L-axis (width) - int BlockItemsK, ///< Extent of block-wide tile in value_t along the K-axis (height) - typename value_t, ///< Input matrix value type - int MatrixAlignBytes, ///< Byte alignment of input matrix - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_loader_wmma -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Predicate bit vector - typedef uint64_t predicate_mask_t; - - /// Data movement type, coarsened by MatrixAlignBytes - typedef io_vector< - value_t, - divide_assert::value, - MatrixAlignBytes> - ldg_vector_t; - - enum - { - /// Number of items per ldg_vector_t - LdgVectorItems = ldg_vector_t::VectorItems, - - /// Total number of ldg_vector_t within the block-wide tile - BlockLdgVectors = divide_assert<(BlockItemsL * BlockItemsK), LdgVectorItems>::value, - - /// Extent of the block-wide tile in ldg_vector_t along K-axis - BlockLdgVectorsK = BlockItemsK, - - /// Extent of the block-wide tile in ldg_vector_t along L-axis - BlockLdgVectorsL = divide_assert::value, - - /// Number of ldg_vector_t within each thread tile - ThreadLdgVectors = divide_assert::value, - - /// Extent of the thread tile in ldg_vector_t along the L-axis - ThreadLdgVectorsL = __NV_STD_MAX(1, BlockLdgVectorsL / BlockThreads), - - /// Block-wide strip-mining distance between ldg_vector_t along the K-axis - BlockLdgVectorStrideK = __NV_STD_MAX(1, BlockThreads / BlockLdgVectorsL), - - /// Extent of the thread tile in ldg_vector_t along the K-axis - ThreadLdgVectorsK = divide_assert::value, - }; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - /// Define assertions - static_assert(ThreadLdgVectorsL * ThreadLdgVectorsK == ThreadLdgVectors, - "Number of vectors must be fully covered by the thread's 2D vector tile."); - - /// Predicate masks must be large enough to guard every vector load - static_assert(sizeof(predicate_mask_t) * 8 >= ThreadLdgVectorsL * ThreadLdgVectorsK, - "Predicate bit vector must be large enough to guard every vector load."); - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// pointer to tile in global memory - const ldg_vector_t *ptr; - - /// stride of the matrix in the K-axis - int matrix_values_stride_k; - - /// Guard predicate - predicate_mask_t guard; - - /// Guard for the last request iteration - predicate_mask_t residue_guard; - - /// Number of 'whole' request iterations before encountering the residue - int request_iterations; - - /// fetch registers - ldg_vector_t fetch[ThreadLdgVectors]; - - /// Thread's base offset from the start of a block-wide tile - int thread_offset_l; - - /// Thread's basae offset from the start of a block-wide tile - int thread_offset_k; - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_loader_wmma( - const value_t *d_matrix, ///< Pointer to input matrix - int matrix_values_l, ///< Extent of the input matrix in value_t along the L-axis - int start_l, ///< Starting location in tile - int dim_k, ///< Inner dimension of tile, used for computing guard predicates - int _matrix_values_stride_k, ///< Stride of K-axis of atrix - int start_k, ///< Tile's starting location - int2 block_begin_item_coords) ///< Thread block's starting value_t coordinates (l, k) within the input matrix - : - ptr(reinterpret_cast(d_matrix)), - matrix_values_stride_k(_matrix_values_stride_k / LdgVectorItems), - guard(0), - residue_guard(0) - { - // Compute block's starting coordinates in units of vectors - int block_base_l = block_begin_item_coords.x / LdgVectorItems; - int block_base_k = block_begin_item_coords.y; - - // Compute a thread tiling of the block-wide tile - int tid = threadIdx.x; - thread_offset_l = tid % BlockLdgVectorsL; - thread_offset_k = tid / BlockLdgVectorsL; - - // Add the block and thread offsets to the source pointer - ptr += (block_base_l + thread_offset_l) + - (block_base_k + thread_offset_k) * matrix_values_stride_k; - - // When AllowRaggedTiles support is enabled, compute a bit vector of guard - // predicates - if (AllowRaggedTiles) - { - if (congruous) - { - request_iterations = (dim_k - start_k) / BlockItemsK; - } - else - { - request_iterations = (matrix_values_l - start_l) / BlockItemsL; - } - - #pragma unroll - for (int k_idx = 0; k_idx < ThreadLdgVectorsK; ++k_idx) - { - #pragma unroll - for (int l_idx = 0; l_idx < ThreadLdgVectorsL; ++l_idx) - { - int item = l_idx + k_idx * ThreadLdgVectorsL; - - // Global vector L and K indices - int vec_l = l_idx * BlockThreads; - int vec_k = k_idx * BlockLdgVectorStrideK; - - predicate_mask_t pred; - predicate_mask_t residue_pred; - - if (congruous) - { - pred = (((block_base_l + thread_offset_l + vec_l) * LdgVectorItems < matrix_values_l) ? 1 : 0); - residue_pred = ((block_base_k + thread_offset_k + vec_k < (dim_k % BlockItemsK)) ? 1 : 0); - } - else - { - pred = ((block_base_k + thread_offset_k + vec_k < dim_k) ? 1 : 0); - residue_pred = (((block_base_l + thread_offset_l + vec_l) * LdgVectorItems < (matrix_values_l % BlockItemsL)) ? 1 : 0); - } - - // Update the guard and residue_guard word with predicate bits - guard |= (pred << item); - residue_guard |= (residue_pred << item); - } - } - - // If there are zero full request iterations, compute the intersection - // with the residue guard. - if (!request_iterations) - { - guard &= residue_guard; - } - } - } - - - - /** - * Request the current block-wide tile from source memory - */ - inline __device__ - void request() - { - #pragma unroll - for (int k_idx = 0; k_idx < ThreadLdgVectorsK; ++k_idx) - { - #pragma unroll - for (int l_idx = 0; l_idx < ThreadLdgVectorsL; ++l_idx) - { - int load_idx = l_idx + (k_idx * ThreadLdgVectorsL); - bool pred = !AllowRaggedTiles || (guard & (predicate_mask_t(1) << load_idx)); - if (pred) - { - fetch[load_idx].load( - ptr + - (k_idx * BlockLdgVectorStrideK * matrix_values_stride_k) + (l_idx * BlockThreads)); - } - else - { - #pragma unroll - for (int elem_idx = 0; elem_idx < LdgVectorItems; ++elem_idx) - { - fetch[load_idx].buff[elem_idx] = 0; - } - } - } - } - } - - - /// Advance to the next block-wide tile - inline __device__ - void next() - { - if (congruous) - { - ptr += BlockItemsK * matrix_values_stride_k; - } - else - { - ptr += BlockLdgVectorsL; - } - - // Track number of full iterations to intersect with the residue guard predicates. - if (AllowRaggedTiles) - { - --request_iterations; - if (!request_iterations) - { - guard &= residue_guard; - } - } - } - - - /// Commit the values to the scratch tile to destination memory. - template - inline __device__ - void commit(value_t *scratch_tile) - { - static_assert(SmemStride % LdgVectorItems == 0, - "SMEM stride must be divisible by the size of vector loads"); - - ldg_vector_t *smem_ptr = reinterpret_cast(scratch_tile); - smem_ptr += thread_offset_l + thread_offset_k * SmemStride / LdgVectorItems; - - #pragma unroll - for (int k_idx = 0; k_idx < ThreadLdgVectorsK; ++k_idx) - { - #pragma unroll - for (int l_idx = 0; l_idx < ThreadLdgVectorsL; ++l_idx) - { - int load_idx = l_idx + (k_idx * ThreadLdgVectorsL); - - fetch[load_idx].store(smem_ptr + - (k_idx * BlockLdgVectorStrideK * SmemStride / LdgVectorItems) + - (l_idx * BlockThreads)); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass - diff --git a/cutlass/gemm/block_task.h b/cutlass/gemm/block_task.h deleted file mode 100644 index 3940fb6ecf..0000000000 --- a/cutlass/gemm/block_task.h +++ /dev/null @@ -1,677 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * A block-wide task abstraction for computing device-wide GEMM - */ - -#include - -#include "../util/util.h" - -#include "grid_raster.h" -#include "block_loader.h" -#include "k_split_control.h" -#include "thread_accumulator.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_task_policy - ******************************************************************************/ - -/** - * \brief Parameterizable tuning policy for \p block_task - * - * Once parameterized, \p block_task_policy provides the member constant - * \p BlockThreads indicating to the required thread block size - */ -template < - int _BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int _BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - int _BlockItemsK, ///< Extent of block-wide A|B tiles in value_t along the K-axis - int _ThreadItemsY, ///< Height in rows of a thread tile in C - int _ThreadItemsX, ///< Width in columns of a thread tile in C - bool _UseDoubleScratchTiles, ///< Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - grid_raster_strategy::kind_t _RasterStrategy> ///< Strategy for enumerating \p block_task within an input matrix -struct block_task_policy -{ - enum - { - /// Height in rows of a block-wide tile in matrix C - BlockItemsY = _BlockItemsY, - - /// Width in columns of a block-wide tile in matrix C - BlockItemsX = _BlockItemsX, - - /// Height in rows of a thread tile in C - ThreadItemsY = _ThreadItemsY, - - /// Width in columns of a thread tile in C - ThreadItemsX = _ThreadItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = _BlockItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = _UseDoubleScratchTiles, - - /// Number of threads in each thread block (blockDim.x) - BlockThreads = divide_assert< - (BlockItemsY * BlockItemsX), - (ThreadItemsY * ThreadItemsX)>::value, - }; - - /// Strategy for enumerating \p block_task within an input matrix - static const grid_raster_strategy::kind_t RasterStrategy = _RasterStrategy; -}; - - -/****************************************************************************** - * block_task - ******************************************************************************/ - -/** - * \brief A block-wide task abstraction for computing device-wide GEMM - * - * Each thread_block is assigned a unique tile of output matrix C to compute by - * consuming the corresponding stripes of the input matrices A and B. - */ -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_task -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of threads in each thread block (blockDim.x) - BlockThreads = block_task_policy_t::BlockThreads, - - /// Extent of thread tile in value_t along M-axis - ThreadItemsY = block_task_policy_t::ThreadItemsY, - - /// Extent of thread tile in value_t along N-axis - ThreadItemsX = block_task_policy_t::ThreadItemsX, - }; - - /// Accumulator type - typedef thread_accumulator< - ThreadItemsY, - ThreadItemsX, - value_t, - accum_t> - thread_accumulator_t; - - /// Dot-product vector type along the K-axis (e.g, uchar4 when using IDP4A) - typedef typename thread_accumulator_t::dp_vector_t dp_vector_t; - - enum - { - /// Whether this is a small, latency-bound tile - IsSmallTile = (ThreadItemsY < 4) && (ThreadItemsX < 4), - - /// Number of value_t in dp_vector_t - DpVectorItems = divide_assert::value, - - /// Extent of block-wide C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - BlockItemsY = block_task_policy_t::BlockItemsY, - - /// Extent of block-wide C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - BlockItemsX = block_task_policy_t::BlockItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = block_task_policy_t::BlockItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = block_task_policy_t::UseDoubleScratchTiles, - - /// Extent of block-wide A|B tiles in dp_vector_t along the K-axis - BlockDpVectorsK = divide_assert::value, - - /// Number of dp_vector_t along M-axis that can be read in a single LDS from the shared A-tile (up to 128b if more than one value_t) - LdsVectorDpVectorsA = __NV_STD_MIN( - ThreadItemsY, - __NV_STD_MAX(1, (128 / (__NV_STD_MAX(sizeof(dp_vector_t), sizeof(accum_t)) * 8)))), - - /// Number of dp_vector_t along N-axis that can be read in a single LDS from the shared B-tile (up to 128b if more than one value_t) - LdsVectorDpVectorsB = __NV_STD_MIN( - ThreadItemsX, - __NV_STD_MAX(1, (128 / (__NV_STD_MAX(sizeof(dp_vector_t), sizeof(accum_t)) * 8)))), - - /// Number of strip-mined LDS vector reads from shared A-tile - ThreadLdsVectorsA = divide_assert::value, - - /// Number of strip-mined LDS vector reads from shared B-tile - ThreadLdsVectorsB = divide_assert::value, - - /// Number of elements in one LDG/STG vector of C-tile - ThreadLdgVectorSizeC = __NV_STD_MIN(LdgAlignC, 16) / (sizeof(accum_t)), - - /// Number of threads in warp - WarpThreads = 32, - - /// Extent of warp in threads along the M-axis - WarpThreadsY = (BlockItemsY > BlockItemsX) ? 8 : 4, - - /// Extent of warp in threads along the N-axis - WarpThreadsX = divide_assert::value, - - /// Extent of warp-wide tile in items along the M-axis - WarpItemsY = WarpThreadsY * ThreadItemsY, - - /// Extent of warp-wide tile in items along the N-axis - WarpItemsX = WarpThreadsX * ThreadItemsX, - - /// Extent of block in warps along M-axis - BlockWarpsY = divide_assert::value, - - /// Extent of block in warps along N-axis - BlockWarpsX = divide_assert::value, - }; - - /// Load-from-shared data movement type for A-tile, coarsened by LdsVectorDpVectorsA - typedef io_vector lds_vector_a_t; - - /// Load-from-shared data movement type for B-tile, coarsened by LdsVectorDpVectorsB - typedef io_vector lds_vector_b_t; - - /// Thread block rasterization helper type - typedef grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - block_task_policy_t::RasterStrategy> - grid_raster_t; - - - /// Tile loader type for matrix A - typedef block_loader< - BlockThreads, // BlockThreads - BlockDpVectorsK, // BlockDpVectorsK - BlockItemsY, // BlockItemsL - value_t, // value_t - LdgAlignA, // MatrixAlignBytes - AllowRaggedTiles, // AllowRaggedTiles - dp_vector_t, // dp_vector_t - (TransformA == matrix_transform_t::NonTranspose) ? // LoadAlgorithm - load_algorithm::CongruousCopy : - load_algorithm::CrosswiseCopy> - block_loader_a_t; - - - /// Tile loader type for matrix B - typedef block_loader< - BlockThreads, // BlockThreads - BlockDpVectorsK, // BlockDpVectorsK - BlockItemsX, // BlockItemsL - value_t, // value_t - LdgAlignB, // MatrixAlignBytes - AllowRaggedTiles, // AllowRaggedTiles - dp_vector_t, // dp_vector_t - (TransformB == matrix_transform_t::NonTranspose) ? // LoadAlgorithm - load_algorithm::CrosswiseCopy : - load_algorithm::CongruousCopy> - block_loader_b_t; - - - enum - { - /// Number of value_t to pad the end of each row of the shared A-tile - PadItemsA = (TransformA == matrix_transform_t::NonTranspose) ? - __NV_STD_MAX(LdsVectorDpVectorsA, block_loader_a_t::AlignmentDpVectorsL) : - LdsVectorDpVectorsA, - - /// Number of value_t to pad the end of each row of the shared B-tile - PadItemsB = (TransformB == matrix_transform_t::NonTranspose) ? - LdsVectorDpVectorsB : - __NV_STD_MAX(LdsVectorDpVectorsB, block_loader_b_t::AlignmentDpVectorsL), - }; - - - /// Shared memory layout for a prefetch page - struct page_storage_t - { - /// Tile of A - dp_vector_t __align__(16) block_a[BlockDpVectorsK][BlockItemsY + PadItemsA]; - - /// Tile of B - dp_vector_t __align__(16) block_b[BlockDpVectorsK][BlockItemsX + PadItemsB]; - }; - - - /// Shared memory layout for scratch storage - struct scratch_storage_t - { - /// Prefetch pages - page_storage_t pages[UseDoubleScratchTiles ? 2 : 1]; - - /// Accumulator shared scratch - typename thread_accumulator_t::scratch_storage_t accum_scratch; - }; - - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - // Ensure we have at least two unrolled innermost loop iterations (one to prefetch - // the next global tile and then one to prefetch the first strip of it from shared) - static_assert ((BlockDpVectorsK >= 2), "BlockDpVectorsK must be >= 2."); - - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Scratch storage reference - scratch_storage_t *scratch; - - /// Which page of scratch tiles we're currently reading from - int page_idx; - - /// Pointer to matrix C - accum_t *d_c; - - /// Epilogue operation applied to update matrix C - epilogue_op_t epilogue_op; - - /// Matrix height in rows of trans_op(A) and C - int dim_m; - - /// Matrix width in columns of trans_op(B) and C - int dim_n; - - /// Control for inter-block k-splitting - k_split_control k_split; - - /// Thread block's base value_t coordinates (m, n) in matrix C - grid_raster_t grid_raster; - - /// Thread block's current coordinate (k) within A|B matrices - int block_item_coords_k; - - /// Thread block's ending coordinate (k) within A|B matrices (one-past) - int block_end_item_k; - - /// Warp's coordinates (x, y) in thread block - int2 block_warp_coords; - - /// Thread's coordinates (x, y) in warp - int2 warp_thread_coords; - - /// Thread's base item offset within strip of A tile - int thread_strip_offset_a; - - /// Thread's base item offset within strip of B tile - int thread_strip_offset_b; - - /// Thread's active-k/prefetch-k slices from shared A tile - lds_vector_a_t local_slices_a[2][ThreadLdsVectorsA]; - - /// Thread's active-k/prefetch-k slices from shared B tile - lds_vector_b_t local_slices_b[2][ThreadLdsVectorsB]; - - /// A tile loader - block_loader_a_t loader_a; - - /// B tile loader - block_loader_b_t loader_b; - - /// C tile accumulator - thread_accumulator_t accumulator; - - - //------------------------------------------------------------------------- - // Coordinate system helpers - //------------------------------------------------------------------------- - - /// Compute the warp's coordinates (x, y) in thread block - inline __device__ - int2 warp_coords() - { - int warp_id = threadIdx.x / WarpThreads; - return make_int2( - warp_id % BlockWarpsX, - warp_id / BlockWarpsX); - } - - - /// Compute the thread's lane-coordinates (x, y) in warp - inline __device__ - int2 thread_coords() - { - int lane_id = threadIdx.x % WarpThreads; - - // Maxwell+ mapping of threads within a 2D warp for maximal LDS bandwidth - return make_int2( - lane_id / WarpThreadsY, - lane_id % WarpThreadsY); - } - - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_task( - scratch_storage_t *scratch, - value_t *d_a, - value_t *d_b, - accum_t *d_c, - epilogue_op_t epilogue_op, - int dim_m, - int dim_n, - int dim_k, - k_split_control k_split) - : - scratch(scratch), - page_idx(0), - d_c(d_c), - epilogue_op(epilogue_op), - dim_m(dim_m), - dim_n(dim_n), - k_split(k_split), - block_item_coords_k(k_split.block_begin_item_k()), - block_end_item_k(k_split.block_end_item_k(dim_k)), - block_warp_coords(warp_coords()), - warp_thread_coords(thread_coords()), - thread_strip_offset_a((warp_thread_coords.y * LdsVectorDpVectorsA) + (block_warp_coords.y * WarpItemsY)), - thread_strip_offset_b((warp_thread_coords.x * LdsVectorDpVectorsB) + (block_warp_coords.x * WarpItemsX)), - - loader_a( - d_a, // d_matrix - dim_m, // matrix_values_l - (TransformA == matrix_transform_t::NonTranspose) ? dim_m : 1, // matrix_values_stride_k - (TransformA == matrix_transform_t::NonTranspose) ? 1 : dim_k, // matrix_values_stride_l - make_int2( // block_begin_item_coords - grid_raster.block_item_coords.y, - block_item_coords_k), - block_end_item_k), // block_end_item_k - - loader_b( - d_b, // d_matrix - dim_n, // matrix_values_l - (TransformB == matrix_transform_t::NonTranspose) ? 1 : dim_n, // matrix_values_stride_k - (TransformB == matrix_transform_t::NonTranspose) ? dim_k : 1, // matrix_values_stride_l - make_int2( // block_begin_item_coords - grid_raster.block_item_coords.x, - block_item_coords_k), - block_end_item_k), // block_end_item_k - - accumulator(scratch->accum_scratch) - {} - - - //------------------------------------------------------------------------- - // Prefetching utility methods - //------------------------------------------------------------------------- - - /** - * Request the calling thread's slices of the shared tiles at depth \p tile_offset_k - */ - inline __device__ void request_local_prefetch( - lds_vector_a_t (&slice_a)[ThreadLdsVectorsA], ///< Slice from A - lds_vector_b_t (&slice_b)[ThreadLdsVectorsB], ///< Slice from B - int tile_offset_k) - { - // Load B strip - for (int i = 0; i < ThreadLdsVectorsB; ++i) - { - slice_b[i].load( - &scratch->pages[page_idx].block_b[tile_offset_k][thread_strip_offset_b + (i * WarpThreadsX * LdsVectorDpVectorsB)]); - } - - // Load A strip - for (int i = 0; i < ThreadLdsVectorsA; ++i) - { - slice_a[i].load( - &scratch->pages[page_idx].block_a[tile_offset_k][thread_strip_offset_a + (i * WarpThreadsY * LdsVectorDpVectorsA)]); - } - } - - - //------------------------------------------------------------------------- - // Epilogue - //------------------------------------------------------------------------- - - /** - * Performs the GEMM epilogue: - * - Applies the scalar multipliers and addends to the accumulators - * - Write the result to the output matrix - */ - __forceinline__ __device__ - void epilogue() - { - // Wait for predecessor thread block(s) to produce block-wide tile of - // exclsuive partial-sums - k_split.wait(); - - // Configure epilogue as to whether the thread block is a secondary - // accumulator in an inter-block k-splitting scheme - if (k_split.is_secondary_accumulator()) - epilogue_op.set_secondary_accumulator(); - - // Whether the addend from C needs loading - bool must_init_addend = epilogue_op.must_init_addend(); - - #pragma unroll - for (int x = 0; x < ThreadItemsX; ++x) - { - #pragma unroll - for (int y = 0; y < ThreadItemsY; y += LdsVectorDpVectorsA) - { - int thread_strip_b = x / LdsVectorDpVectorsB; - int thread_strip_a = y / LdsVectorDpVectorsA; - - int thread_item_coords_tile_x = thread_strip_offset_b + (thread_strip_b * WarpThreadsX * LdsVectorDpVectorsB) + (x % LdsVectorDpVectorsB); - int thread_item_coords_tile_y = thread_strip_offset_a + (thread_strip_a * WarpThreadsY * LdsVectorDpVectorsA) + (y % LdsVectorDpVectorsA); - - int c_idx = (grid_raster.block_item_coords.x + thread_item_coords_tile_x) * dim_m + - grid_raster.block_item_coords.y + thread_item_coords_tile_y; - - accum_t *my_c = d_c + c_idx; - - #pragma unroll - for (int i = 0; i < LdsVectorDpVectorsA; ++i) - { - accum_t c_slice = accum_t(0); - accum_t *c_ptr = my_c + i; - - if ((grid_raster.block_item_coords.x + thread_item_coords_tile_x) < dim_n && - (grid_raster.block_item_coords.y + thread_item_coords_tile_y + i) < dim_m) - { - if (must_init_addend) - { - ldg_cg(c_slice, c_ptr); - } - - c_slice = epilogue_op(accumulator.get(x, y + i), c_slice, c_idx + i); - - stg_cg(c_ptr, c_slice); - } - } - } - } - - // Signal k-split successor thread_block that we have produced our block-wide - // tile of inclusive partial-sums - k_split.signal(); - } - - - //------------------------------------------------------------------------- - // Tile consumption - //------------------------------------------------------------------------- - - /** - * Consume a tile of A and B each - */ - template - __forceinline__ __device__ - void consume_tile() - { - // Unroll BlockDpVectorsK iterations of outer-product accumulations - #pragma unroll - for (int tile_offset_k = 0; tile_offset_k < BlockDpVectorsK; tile_offset_k += 1) - { - // Last strip commits global prefetch for next tile - if ((tile_offset_k == BlockDpVectorsK - 1) && DoGlobalPrefetch) - { - // If not using two pages of scratch tiles, protect the above prefetch loads from the committing writes below - if (!UseDoubleScratchTiles) - __syncthreads(); - - // If using two pages of scratch tiles, switch to next page before writing - if (UseDoubleScratchTiles) - { - page_idx = (page_idx ? 0 : 1); - } - - // Commit global prefetch data to scratch page - loader_a.commit(scratch->pages[page_idx].block_a); - loader_b.commit(scratch->pages[page_idx].block_b); - - __syncthreads(); - } - - // Request local prefetch for next strip - request_local_prefetch( - local_slices_a[(tile_offset_k + 1) % 2], - local_slices_b[(tile_offset_k + 1) % 2], - (tile_offset_k + 1) % BlockDpVectorsK); - - // Request global prefetch for next tile on first strip - if ((tile_offset_k == 0) && DoGlobalPrefetch) - { - loader_b.request(); - loader_b.next(); - loader_a.request(); - loader_a.next(); - } - - // Cast strip-mined loads to contiguous array of dp_vector_t - typedef dp_vector_t thread_tile_a_t[ThreadLdsVectorsA * LdsVectorDpVectorsA]; - typedef dp_vector_t thread_tile_b_t[ThreadLdsVectorsB * LdsVectorDpVectorsB]; - thread_tile_a_t &thread_tile_a = reinterpret_cast(local_slices_a[(tile_offset_k) % 2]); - thread_tile_b_t &thread_tile_b = reinterpret_cast(local_slices_b[(tile_offset_k) % 2]); - - // Accumulate this dp-stripe product - accumulator.multiply_accumulate(thread_tile_a, thread_tile_b); - } - } - - - //------------------------------------------------------------------------- - // GEMM API - //------------------------------------------------------------------------- - - /** - * Compute GEMM - */ - __forceinline__ __device__ - void run() - { - // Quit if the thread block is fully out-of-bounds - if (grid_raster.is_block_oob(dim_m, dim_n)) - { - asm volatile("exit;"); - } - - // Request global prefetch of first tile - loader_a.request(); - loader_a.next(); - loader_b.request(); - loader_b.next(); - - // Commit global prefetch of first tile to shared memory - loader_a.commit(scratch->pages[page_idx].block_a); - loader_b.commit(scratch->pages[page_idx].block_b); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - - // Synchronize shared tiles and prepared accumulator - __syncthreads(); - - // Initialize thread's slice of accumulators - accumulator.init(); - - // Request first iteration of local prefetch strips - request_local_prefetch( - local_slices_a[0], - local_slices_b[0], - 0); - - // - // Main loop - // - - // Consume tiles in A and B along the K-axis (all but last tile) - #pragma unroll 1 - while (block_item_coords_k < block_end_item_k) - { - consume_tile(); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - } - - // Consume last tile - consume_tile(); - - // - // Eplilogue - // - - epilogue(); - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/block_task_wmma.h b/cutlass/gemm/block_task_wmma.h deleted file mode 100644 index 03d2d222e9..0000000000 --- a/cutlass/gemm/block_task_wmma.h +++ /dev/null @@ -1,767 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file - * A block-wide task abstraction for computing device-wide GEMM - */ - -#pragma once - -// Compiler guard conditional to avoid compilation errors on versions of CUDA that -// do not support the WMMA API. -#if defined (WMMA) - -#include - -#include "../util/util.h" - -#include "grid_raster.h" -#include "block_loader.h" -#include "block_loader_wmma.h" -#include "wmma_accumulator.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * block_task_wmma_policy - ******************************************************************************/ - -/** - * \brief Parameterizable tuning policy for block-wide WMMA GEMM tasks - * - * Once parameterized, \p block_task_policy provides the member constant - * \p BlockThreads indicating to the required thread block size - */ -template < - int _BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int _BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - int _BlockItemsK, ///< Extent of block-wide A|B tiles in value_t along the K-axis - int _WarpItemsY, ///< Height in rows of a Warp tile's accumulators - int _WarpItemsX, ///< Width in columns of a Warp tile's accumulators - int _WmmaItemsY, ///< Height in rows of a discrete WMMA block's accumulators - int _WmmaItemsX, ///< Width in columns of a discrete WMMA block's accumulators - int _WmmaItemsK, ///< Depth of each discrete WMMA block - bool _UseDoubleScratchTiles, ///< Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - grid_raster_strategy::kind_t _RasterStrategy> ///< Strategy for enumerating \p block_task within an input matrix -struct block_task_wmma_policy -{ - /// Strategy for enumerating \p block_task within an input matrix - static const grid_raster_strategy::kind_t RasterStrategy = _RasterStrategy; - - enum - { - /// Height in rows of a block-wide tile in matrix C - BlockItemsY = _BlockItemsY, - - /// Width in columns of a block-wide tile in matrix C - BlockItemsX = _BlockItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = _BlockItemsK, - - /// Height in rows of a Warp tile's accumulators - WarpItemsX = _WarpItemsX, - - /// Width in columns of a Warp tile's accumulators - WarpItemsY = _WarpItemsY, - - /// Width in columns of a discrete WMMA block's accumulators - WmmaItemsX = _WmmaItemsX, - - /// Height in rows of a discrete WMMA block's accumulators - WmmaItemsY = _WmmaItemsY, - - /// Depth of each discrete WMMA block - WmmaItemsK = _WmmaItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = _UseDoubleScratchTiles, - - - // - // Derived quantities - // - - /// Machine warp size - WarpThreads = 32, - - /// Number of WMMA operations in the height dimension - WmmaBlocksY = divide_assert::value, - - /// Number of WMMA operations in the height dimension - WmmaBlocksX = divide_assert::value, - - /// Number of warps in each thread block - BlockWarps = divide_assert::value, - - /// Number of threads in each thread block (blockDim.x) - BlockThreads = BlockWarps * WarpThreads, - }; -}; - - -/****************************************************************************** - * block_task_wmma - ******************************************************************************/ - -/** - * \brief A block-wide task abstraction for computing device-wide GEMM - * - * Each thread_block is assigned a unique tile of output matrix C to compute by - * consuming the corresponding stripes of the input matrices A and B. - */ -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation to update matrix C - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether the input matrix's dimensions need not be an even-multiple of the block-wide tile dimensions -> -struct block_task_wmma -{ - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of threads in each thread block (blockDim.x) - BlockThreads = block_task_policy_t::BlockThreads, - - /// Extent of block-wide C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - BlockItemsY = block_task_policy_t::BlockItemsY, - - /// Extent of block-wide C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - BlockItemsX = block_task_policy_t::BlockItemsX, - - /// Extent of block-wide A|B tiles in value_t along the K-axis - BlockItemsK = block_task_policy_t::BlockItemsK, - - /// Extent of warp C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - WarpItemsY = block_task_policy_t::WarpItemsY, - - /// Extent of warp C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - WarpItemsX = block_task_policy_t::WarpItemsX, - - /// Extent of warp C-tile in accum_t (and A-tiles in value_t) along M-axis (height) - WmmaItemsY = block_task_policy_t::WmmaItemsY, - - /// Extent of warp C-tile in accum_t (and B-tiles in value_t) along N-axis (width) - WmmaItemsX = block_task_policy_t::WmmaItemsX, - - /// Extent of warp-wide A|B-tiles in value_t along K-axis - WmmaItemsK = block_task_policy_t::WmmaItemsK, - - /// Whether to halve synchronization overhead at the expense of doubled shared memory and addressing overhead - UseDoubleScratchTiles = block_task_policy_t::UseDoubleScratchTiles, - - /// Number of threads in warp - WarpThreads = block_task_policy_t::WarpThreads, - - /// Number of warps participating - BlockWarps = block_task_policy_t::BlockWarps, - - /// Extent of block in warps along M-axis - BlockWarpsY = divide_assert::value, - - /// Extent of block in warps along N-axis - BlockWarpsX = divide_assert::value, - - /// Number of MMA unrolls - WmmaUnrollCount = divide_assert::value, - - /// True if the A matrix layout is column major (K is the strided dimension) - IsLayoutCongruousA = (TransformA == matrix_transform_t::NonTranspose), - - /// True if the B matrix layout is row mayor (K is the strided dimension) - IsLayoutCongruousB = (TransformB == matrix_transform_t::Transpose), - - }; - - /// WMMA may support unique types for A and B, so plan ahead for this - typedef value_t value_a_t; - - /// WMMA may support unique types for A and B, so plan ahead for this - typedef value_t value_b_t; - - /// WMMA accumulator type - typedef wmma_accumulator< - WarpItemsY, - WarpItemsX, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - value_a_t, - value_b_t, - accum_t, - TransformA, - TransformB> - accumulator_t; - - /// Thread block rasterization helper type - typedef grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - block_task_policy_t::RasterStrategy> - grid_raster_t; - - /// Tile loader type for matrix A - typedef block_loader_wmma< - IsLayoutCongruousA, - BlockThreads, - (IsLayoutCongruousA ? BlockItemsY : BlockItemsK), - (IsLayoutCongruousA ? BlockItemsK : BlockItemsY), - value_a_t, - LdgAlignA, - AllowRaggedTiles> - block_loader_a_t; - - /// Tile loader type for matrix A - typedef block_loader_wmma< - IsLayoutCongruousB, - BlockThreads, - (IsLayoutCongruousB ? BlockItemsX : BlockItemsK), - (IsLayoutCongruousB ? BlockItemsK : BlockItemsX), - value_b_t, - LdgAlignB, - AllowRaggedTiles> - block_loader_b_t; - - /// Type alias for matrix A fragment type - typedef typename accumulator_t::fragment_a_t fragment_a_t; - - /// Type alias for matrix B fragment type - typedef typename accumulator_t::fragment_b_t fragment_b_t; - - enum - { - /// Number of fragments from A matrix - WmmaBlocksY = accumulator_t::WmmaBlocksY, - - /// Number of fragments from B matrix - WmmaBlocksX = accumulator_t::WmmaBlocksX, - - /// Number of value_t to pad the outer dimension of the shared A-tile - PadItemsA = 16, - - /// Number of value_t to pad the outer dimension of the shared B-tile - PadItemsB = 16, - - /// Leading dimension of A matrix tile - LdmSmemA = (IsLayoutCongruousA ? BlockItemsY: BlockItemsK) + PadItemsA, - - /// Leading dimension of A matrix tile - StridedSmemA = (IsLayoutCongruousA ? BlockItemsK : BlockItemsY ), - - /// Leading dimension of B matrix tile - LdmSmemB = (IsLayoutCongruousB? BlockItemsX : BlockItemsK) + PadItemsB, - - StridedSmemB = (IsLayoutCongruousB ? BlockItemsK : BlockItemsX), - }; - - /// Shared memory layout for a prefetch page - struct page_storage_t - { - /// Tile of A - value_a_t __align__(16) block_a[StridedSmemA][LdmSmemA]; - - /// Tile of B - value_b_t __align__(16) block_b[StridedSmemB][LdmSmemB]; - }; - - /// Shared memory layout for scratch storage - struct scratch_storage_t - { - union - { - /// Prefetch pages - uninitialized pages[UseDoubleScratchTiles ? 2 : 1]; - - /// Scratch storage for warps - accum_t epilogue[BlockWarps][WmmaItemsX * WmmaItemsY]; - }; - }; - - //------------------------------------------------------------------------- - // Assert assumptions - //------------------------------------------------------------------------- - - // Ensure we have at least two unrolled innermost loop iterations (one to prefetch - // the next global tile and then one to prefetch the first strip of it from shared) - static_assert ((BlockItemsK >= 2), "BlockItemsK must be >= 2."); - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Scratch storage reference - scratch_storage_t *scratch; - - /// Which page of scratch tiles we're currently reading from - int page_idx; - - /// Pointer to matrix C - accum_t *d_c; - - /// Epilogue operation applied to update matrix C - epilogue_op_t epilogue_op; - - /// Matrix height in rows of trans_op(A) and C - int dim_m; - - /// Matrix width in columns of trans_op(B) and C - int dim_n; - - /// Control for inter-block k-splitting - k_split_control k_split; - - /// Thread block's base value_t coordinates (m, n) in matrix C - grid_raster_t grid_raster; - - /// Thread block's current coordinate (k) within A|B matrices - int block_item_coords_k; - - /// Thread block's ending coordinate (k) within A|B matrices (one-past) - int block_end_item_k; - - /// Warp's coordinates (x, y) in thread block - int2 block_warp_item_coords; - - /// A tile loader - block_loader_a_t loader_a; - - /// B tile loader - block_loader_b_t loader_b; - - /// Thread's active-k/prefetch-k slices from shared A tile - fragment_a_t local_slices_a[2][WmmaBlocksY]; - - /// Thread's active-k/prefetch-k slices from shared B tile - fragment_b_t local_slices_b[2][WmmaBlocksX]; - - /// Accumulator tile - accumulator_t accumulator; - - - //------------------------------------------------------------------------- - // Coordinate system helpers - //------------------------------------------------------------------------- - - /// Compute the warp's item-coordinates (x, y) in thread block - inline __device__ - int2 warp_item_coords() - { - int warp_id = threadIdx.x / WarpThreads; - - return make_int2( - (warp_id / BlockWarpsY) * WarpItemsX, - (warp_id % BlockWarpsY) * WarpItemsY); - } - - /// Compute the thread block's base item-coordinates in matrix A - inline __device__ - int2 a_block_item_coords() - { - if (TransformA == matrix_transform_t::NonTranspose) - { - return make_int2(grid_raster.block_item_coords.y, block_item_coords_k); - } - else - { - return make_int2(block_item_coords_k, grid_raster.block_item_coords.y); - } - } - - /// Compute the thread block's base item-coordinates in matrix B - inline __device__ - int2 b_block_item_coords() - { - if (TransformB == matrix_transform_t::Transpose) - { - return make_int2(grid_raster.block_item_coords.x, block_item_coords_k); - } - else - { - return make_int2(block_item_coords_k, grid_raster.block_item_coords.x); - } - } - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - block_task_wmma( - scratch_storage_t *scratch, - value_t *d_a, - value_t *d_b, - accum_t *d_c, - epilogue_op_t epilogue_op, - int dim_m, - int dim_n, - int dim_k, - k_split_control k_split) - : - scratch(scratch), - page_idx(0), - d_c(d_c), - epilogue_op(epilogue_op), - dim_m(dim_m), - dim_n(dim_n), - k_split(k_split), - block_item_coords_k(k_split.block_begin_item_k()), - block_end_item_k(k_split.block_end_item_k(dim_k)), - block_warp_item_coords(warp_item_coords()), - - loader_a( - reinterpret_cast(d_a), - (IsLayoutCongruousA ? dim_m : block_end_item_k), - (IsLayoutCongruousA ? 0 : block_item_coords_k), - (IsLayoutCongruousA ? block_end_item_k : dim_m), - (IsLayoutCongruousA ? dim_m : dim_k), - (IsLayoutCongruousA ? block_item_coords_k : 0), - a_block_item_coords()), - - loader_b( - reinterpret_cast(d_b), - (IsLayoutCongruousB ? dim_n : block_end_item_k), - (IsLayoutCongruousB ? 0 : block_item_coords_k), - (IsLayoutCongruousB ? block_end_item_k : dim_n), - (IsLayoutCongruousB ? dim_n : dim_k), - (IsLayoutCongruousB ? block_item_coords_k : 0), - b_block_item_coords()) - {} - - - //------------------------------------------------------------------------- - // Prefetching utility methods - //------------------------------------------------------------------------- - - /** - * Request the calling thread's slices of the shared tiles at depth \p tile_offset_k - */ - inline __device__ void request_local_prefetch( - fragment_a_t local_slices_a[WmmaBlocksY], ///< Slice from A - fragment_b_t local_slices_b[WmmaBlocksX], ///< Slice from B - int tile_offset_k) - { - value_b_t const *smem_A_base = &scratch->pages[page_idx].alias().block_a[0][0]; - value_b_t const *smem_B_base = &scratch->pages[page_idx].alias().block_b[0][0]; - - int constexpr kstride_a = (IsLayoutCongruousA ? LdmSmemA : 1); - int constexpr lstride_a = (IsLayoutCongruousA ? 1 : LdmSmemA); - - int constexpr kstride_b = (IsLayoutCongruousB ? LdmSmemB : 1); - int constexpr lstride_b = (IsLayoutCongruousB ? 1 : LdmSmemB); - - // Load B strip - #pragma unroll - for (int i = 0; i < WmmaBlocksX; ++i) - { - value_b_t const *smem_B_ptr = - &smem_B_base[tile_offset_k * kstride_b + (block_warp_item_coords.x + WmmaItemsX * i) * lstride_b]; - - nvcuda::wmma::load_matrix_sync(local_slices_b[i], smem_B_ptr, LdmSmemB); - } - - // Load A strip - #pragma unroll - for (int i = 0; i < WmmaBlocksY; ++i) - { - value_a_t const *smem_A_ptr = - &smem_A_base[tile_offset_k * kstride_a + (block_warp_item_coords.y + WmmaItemsY * i) * lstride_a]; - - nvcuda::wmma::load_matrix_sync(local_slices_a[i], smem_A_ptr, LdmSmemA); - } - } - - - //------------------------------------------------------------------------- - // Epilogue - //------------------------------------------------------------------------- - - /** - * Performs the GEMM epilogue: - * - Applies the scalar multipliers and addends to the accumulators - * - Write the result to the output matrix - */ - inline __device__ void epilogue() - { - // Wait for predecessor thread block(s) to produce partial-sums - k_split.wait(); - - // Configure epilogue as to whether the thread block is a secondary - // accumulator in an inter-block k-splitting scheme - if (k_split.is_secondary_accumulator()) - epilogue_op.set_secondary_accumulator(); - - // Whether or not the addend from C needs loading - bool must_init_addend = epilogue_op.must_init_addend(); - - int warp_base_x = grid_raster.block_item_coords.x + block_warp_item_coords.x; - int warp_base_y = grid_raster.block_item_coords.y + block_warp_item_coords.y; - - int constexpr SmemStride = WmmaItemsY; - - int warp_id = threadIdx.x / 32; - - // Compute shape of one accumulator read/modify/write operation - int constexpr ItemsY = (WmmaItemsY); - int constexpr ItemsX = (32 / ItemsY); - int constexpr IterationsX = WmmaItemsX / ItemsX; - - // Compute a rasterization of warp lanes across the WMMA tile. - int lane_id = (threadIdx.x % 32); - int lane_read_x = (lane_id / ItemsY); - int lane_read_y = (lane_id % ItemsY); - - accum_t *smem_scratch = scratch->epilogue[warp_id]; - accum_t const *smem_read_ptr = smem_scratch + lane_read_y + lane_read_x * SmemStride; - - #pragma unroll - for (int xb = 0; xb < WmmaBlocksX; ++xb) - { - #pragma unroll - for (int yb = 0; yb < WmmaBlocksY; ++yb) - { - // Store accumulator tile to SMEM - nvcuda::wmma::store_matrix_sync( - smem_scratch, - accumulator.accumulators[xb][yb], - SmemStride, - matrix_layout::kind); - - // Synchronize threads within the warp - __syncthreads(); - - // Compute lane coordinates so that each thread efficiently accesses SMEM. - int c_x = (warp_base_x + (xb) * WmmaItemsX + lane_read_x); - int c_y = (warp_base_y + (yb) * WmmaItemsY + lane_read_y); - - // Compute guard predicate by comparing against problem dimensions. - bool pred = c_y < dim_m; - - // Compute output pointer from lane coordinates - int c_index = c_x * dim_m + c_y; - accum_t *c_ptr = reinterpret_cast(d_c) + c_x * dim_m + c_y; - - // Iterate over columns of output tile. Load from SMEM, compute epilogue operation, - // and stream output to global memory - #pragma unroll - for (int item_x = 0; item_x < IterationsX; ++item_x) - { - accum_t accum = smem_read_ptr[item_x * ItemsX * SmemStride]; - accum_t c_element = 0; - - // Filter against problem dimensions as the warp iterates across the columns of - // output. - pred = (pred && ((c_x + item_x * ItemsX) < dim_n)); - - if (must_init_addend && pred) - { - // NB: inline PTX to utilize strong operations for inter-block synchronization. - // The following is equivalent to: - // - // c_element = c_ptr[0]; - asm volatile ("ld.global.cg.f32 %0, [%1];\n" : "=f"(c_element) : "l"(c_ptr)); - } - - c_element = epilogue_op(accum, c_element, c_index); - - if (pred) - { - // NB: inline PTX to utilize strong operations for inter-block synchronization. - // The following is equivalent to: - // - // c_ptr[0] = c_element; - - asm volatile ("st.global.cg.f32 [%0], %1;\n" : : "l"(c_ptr), "f"(c_element)); - } - - // Increment output pointer - c_ptr += dim_m * ItemsX; - c_index += dim_m * ItemsX; - } - __syncthreads(); - } - } - - // Signal k-split successor thread_block - k_split.signal(); - } - - //------------------------------------------------------------------------- - // Tile consumption - //------------------------------------------------------------------------- - - /** - * Consume a tile of A and B each - */ - template - inline __device__ - void consume_tile() - { - // Request global prefetch for next tile on first strip - if (DoGlobalPrefetch) - { - loader_b.request(); - loader_b.next(); - loader_a.request(); - loader_a.next(); - } - - // Unroll BlockDpVectorsK iterations of outer-product accumulations - #pragma unroll - for (int iteration = 0; iteration < WmmaUnrollCount; ++iteration) - { - int tile_offset_k = iteration * WmmaItemsK; - - // Active load-from-shared index - int active_lds_idx = __NV_STD_MIN(WmmaUnrollCount - 1, (iteration) % 2); - - // Next load-from-shared index - int next_lds_idx = __NV_STD_MIN(WmmaUnrollCount - 1, (iteration + 1) % 2); - - // The last unrolled iteration commits the global fetches - if ((iteration == WmmaUnrollCount - 1) && DoGlobalPrefetch) - { - // If not using two pages of scratch tiles, protect the above prefetch loads from - // the committing writes below - if (!UseDoubleScratchTiles) - { - __syncthreads(); - } - else - { - page_idx = (page_idx ? 0 : 1); - } - - // Commit global prefetch data to scratch page - loader_a.template commit(&scratch->pages[page_idx].alias().block_a[0][0]); - loader_b.template commit(&scratch->pages[page_idx].alias().block_b[0][0]); - - __syncthreads(); - } - - // Accumulate this dp-stripe product - accumulator.multiply_accumulate( - local_slices_a[active_lds_idx], - local_slices_b[active_lds_idx]); - - // Request local prefetch for next strip - request_local_prefetch( - local_slices_a[next_lds_idx], - local_slices_b[next_lds_idx], - (tile_offset_k + WmmaItemsK) % BlockItemsK); - } - } - - //------------------------------------------------------------------------- - // GEMM API - //------------------------------------------------------------------------- - - /** - * Compute GEMM - */ - inline __device__ - void run() - { - // Quit if the thread block is fully out-of-bounds - if (grid_raster.is_block_oob(dim_m, dim_n)) - { - asm volatile("exit;"); - } - - // Request global prefetch of first tile - loader_a.request(); - loader_a.next(); - loader_b.request(); - loader_b.next(); - - // Commit global prefetch of first tile to shared memory - loader_a.template commit(&scratch->pages[page_idx].alias().block_a[0][0]); - loader_b.template commit(&scratch->pages[page_idx].alias().block_b[0][0]); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - - // Synchronize shared tiles and prepared accumulator - __syncthreads(); - - // Initialize thread's slice of accumulators - accumulator.init(); - - // Request first iteration of local prefetch strips - request_local_prefetch( - local_slices_a[0], - local_slices_b[0], - 0); - - // - // Main loop - // - - // Consume tiles in A and B along the K-axis (all but last tile) - #pragma unroll 1 - while (block_item_coords_k < block_end_item_k) - { - consume_tile(); - - // Advance to next A,B tiles in K-axis - block_item_coords_k += BlockItemsK; - } - - consume_tile(); - - // - // Eplilogue - // - - // prevent overwriting SMEM until all warps have finished loading data - __syncthreads(); - - // store accumulator tile to global memory - epilogue(); - } -}; - -} // namespace gemm -} // namespace cutlass - -#endif diff --git a/cutlass/gemm/clear_accumulators.h b/cutlass/gemm/clear_accumulators.h new file mode 100644 index 0000000000..12e1f57909 --- /dev/null +++ b/cutlass/gemm/clear_accumulators.h @@ -0,0 +1,55 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines abstractions for efficiently clearing accumulator tiles. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ClearAccumulators { + /// The shared storage. + struct SharedStorage {}; + + /// Ctor. + CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {} + + /// Clear the fragment. + template + CUTLASS_DEVICE void clear(Fragment_& fragment) { + fragment.clear(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/dgemm_traits.h b/cutlass/gemm/dgemm_traits.h new file mode 100644 index 0000000000..0bbc2210bc --- /dev/null +++ b/cutlass/gemm/dgemm_traits.h @@ -0,0 +1,127 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural traits of double-precision GEMM. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 1> +struct DgemmConfig + : public GemmConfig< + /// The scalar type for A. + double, + /// The scalar type for B. + double, + /// The scalar type for C. + double, + /// The scalar type for D. + double, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, double, double, double>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 2, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 2, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 2, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 64, 128>, + /// The functor to use in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<8, 8, 8>, + /// The number of doubles loaded in one LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of doubles loaded in one LDG for B. + int kScalarsPerLdgB_ = 1, + /// The index. + typename Index_ = int, + /// The DGEMM config. + typename GemmConfig_ = + DgemmConfig, + /// The traits class for the epilogue. + typename GemmEpilogueTraits_ = + SimplifiedGemmEpilogueTraits > +struct DgemmTraits : public SimplifiedGemmTraits< + // The layout for A. + kLayoutA_, + // The layout for B. + kLayoutB_, + // The config. + GemmConfig_, + // The epilogue. + GemmEpilogue, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/dispatch.h b/cutlass/gemm/dispatch.h deleted file mode 100644 index ae1819a62c..0000000000 --- a/cutlass/gemm/dispatch.h +++ /dev/null @@ -1,542 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * GEMM kernel entrypoint and dispatch stub - */ - -#include - -#include "../util/util.h" -#include "block_task.h" -#include "block_task_wmma.h" -#include "grid_raster.h" -#include "dispatch_policies.h" -#include "k_split_control.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * param_pack - ******************************************************************************/ - -/** - * Parameter-pack structure - * - * Kernel launch latency is reduced when kernel arguments are wrapped into - * a single parameter - */ -template < - typename value_t, - typename accum_t, - typename epilogue_op_t> -struct param_pack -{ - int m; ///< Height in rows of op(A) and C - int n; ///< Width in columns of op(B) and C - int k; ///< Width in columns of op(A) and height in rows of op(B) - k_split_control k_split; ///< Abstraction for controlling inter-block k-splitting - value_t *d_a; ///< Pointer to matrix A array values - value_t *d_b; ///< Pointer to matrix B array values - accum_t *d_c; ///< Pointer to matrix C array values - epilogue_op_t epilogue_op; - - param_pack( - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - k_split_control k_split, ///< Abstraction for controlling inter-block k-splitting - epilogue_op_t op, ///< Epilogue operation to update matrix C - value_t *d_a, ///< Pointer to matrix A array values - value_t *d_b, ///< Pointer to matrix B array values - accum_t *d_c) ///< Pointer to matrix C array values - : - m(m), - n(n), - k(k), - k_split(k_split), - epilogue_op(op), - d_a(d_a), - d_b(d_b), - d_c(d_c) - {} - -}; - - -/****************************************************************************** - * Conditionally select the appropriate GEMM threadblock task - ******************************************************************************/ - -/// Conditional selection for block task -template < - math_operation_class_t math_op, ///< - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether GEMM supports matrix sizes other than multiple of BlockItems{XY} -> -struct gemm_block_task; - -/// Scalar math operations -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether GEMM supports matrix sizes other than multiple of BlockItems{XY} -> -struct gemm_block_task< - math_operation_class_t::scalar, - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles -> -{ - // Parameterize task type - typedef block_task< - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles> type; -}; - -/// Matrix math operations -template < - typename block_task_policy_t, ///< Parameterization of block_task_policy - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - int LdgAlignA, ///< Alignment (in bytes) for A operand - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - int LdgAlignB, ///< Alignment (in bytes) for B operand - typename epilogue_op_t, ///< Epilogue operation applied to GEMM - int LdgAlignC, ///< Alignment (in bytes) for C operand - bool AllowRaggedTiles ///< Whether GEMM supports matrix sizes other than multiple of BlockItems{XY} -> -struct gemm_block_task< - math_operation_class_t::matrix, - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles> -{ - -#if defined(WMMA) // conditional compilation with WMMA headers - - // Parameterize task type - typedef block_task_wmma< - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles> type; - -#endif -}; - -/****************************************************************************** - * GEMM kernel entrypoint - ******************************************************************************/ - -/** - * GEMM kernel - * - * NB: Not sure why NVVM is doing stuff with "__launch_bounds__" instead of just - * passing it along to PTXAS, but it is currently resulting in less optimal codegen - */ -template < - math_operation_class_t math_op, ///< Indicates which class of math operation to select - typename block_task_policy_t, ///< Parameterization of block_task_policy - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - int LdgAlignA, ///< Alignment of A matrix elements in bytes - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - int LdgAlignB, ///< Alignment of B matrix elements in bytes - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - typename epilogue_op_t, ///< Epilogue operation applied to update matrix C - int LdgAlignC, ///< Alignment of C elements in bytes - bool AllowRaggedTiles> ///< Boolean to indicate whether AllowRaggedTiles handling is enabled -__global__ void kernel(param_pack pack) -{ - // Parameterize task type - typedef typename gemm_block_task< - math_op, - block_task_policy_t, - value_t, - accum_t, - TransformA, - LdgAlignA, - TransformB, - LdgAlignB, - epilogue_op_t, - LdgAlignC, - AllowRaggedTiles>::type block_task_t; - - // Declare statically-allocated shared storage - __shared__ typename block_task_t::scratch_storage_t smem; - - // Construct and run the task - block_task_t( - &smem, - pack.d_a, - pack.d_b, - pack.d_c, - pack.epilogue_op, - pack.m, - pack.n, - pack.k, - pack.k_split).run(); -} - - -/****************************************************************************** - * Launch configuration description returned to the caller - ******************************************************************************/ - -/// Return details about the launch configuration to the caller -struct launch_configuration -{ - // - // Data members - // - - /// cudaError_t resulting from grid launch - cudaError_t result; - - /// Extent of a thread block's partition along the GEMM K-axis - int split_k; - - /// Kernel grid extents in thread blocks - dim3 grid; - - /// Thread block extents in threads - dim3 block; - - // - // Methods - // - - /// Constructor - launch_configuration(): - result(cudaSuccess), - split_k(0), - grid(0, 0, 0), - block(0, 0, 0) { - - } - - /// Conversion from cudaError_t - launch_configuration(cudaError_t result): - result(result), - split_k(1), - grid(0, 0, 0), - block(0, 0, 0) { - - } - - /// Launch configuration for Cutlass kernels - launch_configuration( - cudaError_t result, - int split_k, - dim3 grid, - dim3 block - ): - result(result), - split_k(split_k), - grid(grid), - block(block) { - - } -}; - - -/****************************************************************************** - * Dispatch stub - ******************************************************************************/ - -/** - * GEMM dispatch stub - * - * This function also serves as the autotuning entrypoint to evaluate different - * tuning parameterizations of kernel. - */ -template < - math_operation_class_t math_op, ///< Indicates which class of math operation to select - typename block_task_policy_t, ///< Parameterization of block_task_policy - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - int LdgAlignA, ///< Alignment of A matrix elements in bytes - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - int LdgAlignB, ///< Alignment of B matrix elements in bytes - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - typename epilogue_op_t, ///< Epilogue operation - int LdgAlignC, ///< Alignment of C matrix elements in bytes - bool AllowRaggedTiles, ///< Boolean to indicate whether AllowRaggedTiles handling is enabled - typename kernel_ptr_t> ///< GEMM kernel function pointer type -launch_configuration dispatch( - kernel_ptr_t kernel_ptr, ///< GEMM kernel function pointer - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - epilogue_op_t epilogue_op, ///< Epilogue operation to update matrix C - value_t *d_a, ///< Device pointer to matrix A array values - value_t *d_b, ///< Device pointer to matrix B array values - accum_t *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = true) ///< Whether or not to synchronize the stream after every kernel launch - /// to check for errors. Also causes launch configurations to be printed - /// to the console if DEBUG is defined. Default is \p false. -{ - // Thread block rasterization type - typedef grid_raster< - block_task_policy_t::BlockItemsY, - block_task_policy_t::BlockItemsX, - TransformA, - TransformB, - block_task_policy_t::RasterStrategy> - grid_raster_t; - - launch_configuration config; - - // Compute block dims - config.block = dim3(block_task_policy_t::BlockThreads); - - // Compute shared memory - int dynamic_smem_bytes = 0; - - // Compute occupancy - int max_sm_occupancy; - if (CUDA_PERROR_DEBUG(config.result = cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &max_sm_occupancy, - kernel_ptr, - config.block.x * config.block.y, - dynamic_smem_bytes))) - { - return config; - } - - // Compute grid extents - config.grid = grid_raster_t::grid_dims(m, n); - - // Get SM count - int sm_count; - if (CUDA_PERROR_DEBUG(config.result = get_sm_count(sm_count))) - return config; - - // Get k-split flag storage (TODO: make a pool) - int *d_flags; - if (CUDA_PERROR_DEBUG(config.result = cudaGetSymbolAddress((void**) &d_flags, d_flags_split_k))) - return config; - - // Construct k-split coordinator - k_split_control k_split( - d_flags, - sm_count, - max_sm_occupancy, - k, - block_task_policy_t::BlockItemsK, - config.block, - config.grid); // in,out - - config.split_k = k_split.split_k; - - // Log kernel configuration - if (debug_synchronous) - { - // Compute tiling efficiency - float block_tiling_efficiency = float(block_task_policy_t::BlockItemsY * block_task_policy_t::BlockItemsX) / - float(block_task_policy_t::BlockItemsY + block_task_policy_t::BlockItemsX); - - float tiling_efficiency = block_tiling_efficiency; - - float wave_efficiency = k_split.get_wave_efficiency( - sm_count, max_sm_occupancy, config.block, config.grid); - - CUDA_LOG_DEBUG("Final wave_efficiency %.4f, tiling_efficiency %.4f\n", - wave_efficiency, tiling_efficiency); - - CUDA_LOG_DEBUG("Invoking kernel<<<(%d, %d, %d), (%d.y,%d.x), %d, %lld>>>(), %d SM occupancy, %d split_k\n", - config.grid.x, config.grid.y, config.grid.z, - config.block.y, config.block.x, - dynamic_smem_bytes, - (long long) stream, - max_sm_occupancy, - k_split.split_k); - } - - // Construct parameter-pack - param_pack pack( - m, - n, - k, - k_split, - epilogue_op, - d_a, - d_b, - d_c); - - // Prepare k-split coordinator - if (CUDA_PERROR_DEBUG(config.result = k_split.prepare(stream, debug_synchronous))) - { - return config; - } - - // Invoke kernel - kernel_ptr<<< config.grid, config.block, dynamic_smem_bytes, stream >>>(pack); - - // Check for failure to launch - if (CUDA_PERROR_DEBUG(config.result = cudaPeekAtLastError())) - return config; - - // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CUDA_PERROR_DEBUG(config.result = cudaStreamSynchronize(stream)))) - return config; - - return config; -} - - -/****************************************************************************** - * GEMM - ******************************************************************************/ - -/** - * Computes gemm on device matrices - */ -template < - tiling_strategy::kind_t TilingStrategy, ///< Tile-sizing classification - math_operation_class_t math_op, ///< Indicates which class of math operation to select - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - int LdgAlignA, ///< Alignment (in bytes) of A operand - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - int LdgAlignB, ///< Alignment (in bytes) of B operand - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t, ///< Accumulator value type (matrix C and scalars) - typename epilogue_op_t, ///< Epilogue operation to update matrix C - int LdgAlignC> ///< Alignment (in bytes) of C operand -launch_configuration device_gemm( - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - epilogue_op_t epilogue_op, ///< Epilogue operation to update matrix C - value_t *d_a, ///< Device pointer to matrix A array values - value_t *d_b, ///< Device pointer to matrix B array values - accum_t *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to - /// check for errors. Also causes launch configurations to be printed to - /// the console if DEBUG is defined. Default is \p false. -{ - // Parameterize an task policy type - // (TODO: use a policy dispatch mechanism based upon SM version) - typedef gemm_policy block_task_policy_t; - - // AllowRaggedTiles-tile check - if ((m % block_task_policy_t::BlockItemsY != 0) || - (n % block_task_policy_t::BlockItemsX != 0) || - (k % block_task_policy_t::BlockItemsK != 0)) - { - // Needs ragged tile-handling - static const bool AllowRaggedTiles = true; - - return dispatch( - kernel, - m, - n, - k, - epilogue_op, - d_a, - d_b, - d_c, - stream, - debug_synchronous); - } - else - { - // Does not need ragged tile-handling - static const bool AllowRaggedTiles = false; - - return dispatch( - kernel, - m, - n, - k, - epilogue_op, - d_a, - d_b, - d_c, - stream, - debug_synchronous); - } - - -} - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/dispatch_policies.h b/cutlass/gemm/dispatch_policies.h deleted file mode 100644 index 298f7be559..0000000000 --- a/cutlass/gemm/dispatch_policies.h +++ /dev/null @@ -1,661 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Architecture-specific GEMM block_task policies - */ - -#include - -#include "../util/util.h" -#include "block_task.h" -#include "grid_raster.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * tiling_strategy - ******************************************************************************/ - -/** - * Enumeration of tile-sizing granularities - */ -struct tiling_strategy : printable_t -{ - /// \brief Enumerants - enum kind_t - { - Unknown, - Small, - Medium, - Large, - Tall, - Wide, - Huge, - }; - - /// Enumerant value - kind_t kind; - - /// Default constructor - tiling_strategy() : kind(Unknown) {} - - /// Copy constructor - tiling_strategy(const kind_t &other_kind) : kind(other_kind) {} - - /// Cast to kind_t - operator kind_t() const { return kind; } - - /// Returns the instance as a string - __host__ __device__ inline - char const* to_string() const - { - switch (kind) - { - case Small: return "small"; - case Medium: return "medium"; - case Large: return "large"; - case Tall: return "tall"; - case Wide: return "wide"; - case Huge: return "huge"; - case Unknown: - default: return "unknown"; - } - } - - /// Insert the formatted instance into the output stream - void print(std::ostream& out) const { out << to_string(); } -}; - - -/****************************************************************************** - * GEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for sgemm - */ -template < - typename value_t, - typename accum_t, - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - tiling_strategy::kind_t TilingStrategy> ///< Tile-sizing classification -struct gemm_policy; - - -/****************************************************************************** - * SGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 16, // _BlockItemsY - 16, // _BlockItemsX - 16, // _BlockItemsK - 2, // _ThreadItemsY - 2, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for medium sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for tall sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for wide sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge sgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * DGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 16, // _BlockItemsY - 16, // _BlockItemsX - 16, // _BlockItemsK - 2, // _ThreadItemsY - 2, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for medium dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 16, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for tall dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for wide dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge dgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * HGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Small> : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for medium hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Medium> : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 16, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Large> : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 8, // _BlockItemsK - 16, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for tall hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Tall> : - block_task_policy< - 128, // _BlockItemsY - 32, // _BlockItemsX - 8, // _BlockItemsK - 16, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for wide hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Wide> : - block_task_policy< - 32, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge hgemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy<__half, __half, TransformA, TransformB, tiling_strategy::Huge> : - block_task_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 8, // _BlockItemsK - 16, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * IGEMM - ******************************************************************************/ - -/** - * GEMM task policy specialization for small igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 16, // _BlockItemsY - 32, // _BlockItemsX - 32, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/** - * GEMM task policy specialization for medium igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 32, // _BlockItemsK - 4, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 32, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 64, // _BlockItemsX - 64, // _BlockItemsK - 8, // _ThreadItemsY - 4, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for large igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 64, // _BlockItemsY - 128, // _BlockItemsX - 64, // _BlockItemsK - 4, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - -/** - * GEMM task policy specialization for huge igemm - */ -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - block_task_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 32, // _BlockItemsK - 8, // _ThreadItemsY - 8, // _ThreadItemsX - false, // _UseDoubleScratchTiles - grid_raster_strategy::Default> // _RasterStrategy -{}; - - -/****************************************************************************** - * WMMA GEMM - ******************************************************************************/ - -// WMMA is a preview feature in CUDA. Conditionally enable wmma_gemm policies. -#if defined(WMMA) - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - gemm::block_task_wmma_policy< - 16, // _BlockItemsY - 16, // _BlockItemsX - 16, // _BlockItemsK - 16, // _WarpItemsY - 16, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy : - gemm::block_task_wmma_policy< - 32, // _BlockItemsY - 32, // _BlockItemsX - 32, // _BlockItemsK - 32, // _WarpItemsY - 32, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Large> : - gemm::block_task_wmma_policy< - 64, // _BlockItemsY - 64, // _BlockItemsX - 32, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Tall> : - gemm::block_task_wmma_policy< - 128, // _BlockItemsY - 64, // _BlockItemsX - 64, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Wide> : - gemm::block_task_wmma_policy< - 64, // _BlockItemsY - 128, // _BlockItemsX - 64, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -template < - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB> ///< Transformation op for matrix B -struct gemm_policy< half, float, TransformA, TransformB, tiling_strategy::Huge> : - gemm::block_task_wmma_policy< - 128, // _BlockItemsY - 128, // _BlockItemsX - 64, // _BlockItemsK - 32, // _WarpItemsY - 64, // _WarpItemsX - 16, // _WmmaItemsY - 16, // _WmmaItemsX - 16, // _WmmaItemsK - false, // _UseDoubleScratchTiles - gemm::grid_raster_strategy::Default> // _RasterStrategy -{}; - -#endif - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/dp_accummulate.h b/cutlass/gemm/dp_accummulate.h deleted file mode 100644 index 6b5d4b6c92..0000000000 --- a/cutlass/gemm/dp_accummulate.h +++ /dev/null @@ -1,223 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Abstraction for exposing architecture-specific "dot-product-accumulate" - * ISA operations - */ - -#include - -#include "../util/util.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * dp_accummulate - ******************************************************************************/ - - -/** - * \brief Abstraction for exposing architecture-specific "dot-product-accumulate" - * ISA operations - * - * Given two K-component vectors a and b having type value_t[K] and an addend c - * of type accum_t, the "dot-product-accumulate" of type accum_t is computed - * as d = x[0]*y[0] + x[1]*y[1] + ... + x[K-1]*y[K-1] + c. - * - * We use the notation "dpK" to connote a K-component dot-product-accumulate. - * For example, "dp1" is a simple multiply-add. - * - * For given pairing of value_t and accum_t types, the corresponding - * dp_accummulate class will: - * - * - Define the member-type dp_vector_t as the appropriate K-component vector - * type needed to leverage architecture-specific "dot-product accumulate" - * ISA operations. - * - Implement the corresponding dot-product operation between two dp_vector_t - * inputs a and b. - * - */ -template < - typename value_t, ///< Component value type - typename accum_t> ///< Accumulator value type -struct dp_accummulate; - - - -/// Default "dp1" dot-product-accumulate traits specialization for value_t->accum_t -template < - typename value_t, ///< Component value type - typename accum_t> ///< Accumulator value type -struct dp_accummulate -{ - /// Single-component "dp1" dot-product vector type - typedef value_t dp_vector_t; - - - /// Compute "dp1" float->float - inline __device__ - static void mad( - float &d, - const float &a, - const float &b, - const float &c) - { - asm volatile ( "fma.rn.f32 %0, %1, %2, %3;\n" - : "=f"(d) : "f"(a), "f"(b), "f"(c)); - } - - - /// Compute "dp1" double->double - inline __device__ - static void mad( - double &d, - const double &a, - const double &b, - const double &c) - { - asm volatile ("fma.rn.f64 %0, %1, %2, %3;\n" - : "=d"(d) : "d"(a), "d"(b), "d"(c)); - } - - - /// Compute "dp1" int16_t->int32_t - inline __device__ - static void mad( - int32_t &d, - const int16_t &a, - const int16_t &b, - const int32_t &c) - { - asm volatile ("mad.wide.s16 %0, %1, %2, %3;\n" - : "=r"(d) : "h"(a), "h"(b), "r"(c)); - } - - - /// Compute "dp1" uint16_t->uint32_t - inline __device__ - static void mad( - uint32_t &d, - const uint16_t &a, - const uint16_t &b, - const uint32_t &c) - { - asm volatile ("mad.wide.u16 %0, %1, %2, %3;\n" - : "=r"(d) : "h"(a), "h"(b), "r"(c)); - } - - - /// Compute "dp1" int32_t->int32_t - inline __device__ - static void mad( - int32_t &d, - const int32_t &a, - const int32_t &b, - const int32_t &c) - { - asm volatile ("mad.lo.s32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } - - - /// Compute "dp1" uint32_t->uint32_t - inline __device__ - static void mad( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - asm volatile ("mad.lo.u32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } - -}; - - - -#if (CUTLASS_ARCH >= 610) // Specializations only enabled for Pascal SM610+ - - -/// "dp4" dot-product-accumulate traits specialization for int8_t->int32_t -template <> -struct dp_accummulate< - int8_t, ///< Component value type - int32_t> ///< Accumulator value type -{ - /// Four-component signed "idp4" - typedef int32_t dp_vector_t; - - /// Compute "dp4" int16_t->int32_t - inline __device__ - static void mad( - int32_t &d, - const int32_t &a, - const int32_t &b, - const int32_t &c) - { - asm volatile ( "dp4a.s32.s32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } -}; - - -/// "dp4" dot-product-accumulate traits specialization for uint8_t->uint32_t -template <> -struct dp_accummulate< - uint8_t, ///< Component value type - uint32_t> ///< Accumulator value type -{ - /// Four-component unsigned "idp4" - typedef uint32_t dp_vector_t; - - /// Compute "dp4" uint16_t->uint32_t - inline __device__ - static void mad( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - asm volatile ( "dp4a.u32.u32 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } -}; - - -#endif // Specializations only enabled for Pascal SM610+ - - -} // namespace gemm -} // namespace cutlass - diff --git a/cutlass/gemm/epilogue_function.h b/cutlass/gemm/epilogue_function.h deleted file mode 100644 index 714dd81715..0000000000 --- a/cutlass/gemm/epilogue_function.h +++ /dev/null @@ -1,104 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Epilogue operation to compute final output - */ - -namespace cutlass { -namespace gemm { - - //// Used by GEMM to compute the final result C <= alpha * accumulator + beta * C - template < - typename accum_t, - typename output_t, - typename scalar_t - > - class blas_scaled_epilogue - { - public: - - scalar_t alpha; - scalar_t beta; - - inline __device__ __host__ - blas_scaled_epilogue( - scalar_t alpha, - scalar_t beta) - : - alpha(alpha), - beta(beta) - {} - - - /// Epilogue operator - inline __device__ __host__ - output_t operator()( - accum_t accumulator, - output_t c, - size_t idx) const - { - return output_t(alpha * scalar_t(accumulator) + beta * scalar_t(c)); - } - - - /// Epilogue operator - inline __device__ __host__ - output_t operator()( - accum_t accumulator, - size_t idx) const - { - return output_t(alpha * scalar_t(accumulator)); - } - - /** - * Configure epilogue as to whether the thread block is a secondary - * accumulator in an inter-block k-splitting scheme - */ - inline __device__ - void set_secondary_accumulator() - { - beta = scalar_t(1); - } - - - /// Return whether the beta-scaled addend needs initialization - inline __device__ - bool must_init_addend() - { - return (beta != scalar_t(0)); - } - }; - - - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/gemm.h b/cutlass/gemm/gemm.h new file mode 100644 index 0000000000..0ca093ff53 --- /dev/null +++ b/cutlass/gemm/gemm.h @@ -0,0 +1,319 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements a software-pipelined efficient GEMM. +*/ +#pragma once + +#if !defined(__CUDACC_RTC__) +#include +#endif + +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +__global__ void gemm_kernel(typename Gemm_::Params params) { + // Declare shared memory. + __shared__ typename Gemm_::SharedStorage shared_storage; + + // Construct the GEMM object. + Gemm_ gemm(params, shared_storage); + // Run GEMM. + gemm.multiply_add(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmDesc { + /// The dimensions of the GEMM. + Index_ m, n, k; + /// The alpha/beta scaling values. + Scalar_ alpha, beta; + /// The source matrix A. + void const* d_a; + /// The stride for A. + Index_ lda; + /// The source matrix B. + void const* d_b; + /// The stride for B. + Index_ ldb; + /// The source matrix C. + void const* d_c; + /// The stride for C. + Index_ ldc; + /// The destination matrix D. + void* d_d; + /// The stride for D. + Index_ ldd; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Gemm { + /// This class. + typedef Gemm This_; + /// The traits. + typedef GemmTraits_ Traits; + /// The shared storage. + typedef typename Traits::SharedStorage SharedStorage; + + /// The scalar for A. + typedef typename Traits::ScalarA ScalarA; + /// The scalar for B. + typedef typename Traits::ScalarB ScalarB; + /// The scalar in the epilogue. + typedef typename Traits::Epilogue::Scalar ScalarEpilogue; + /// The scalar for C. + typedef typename Traits::Epilogue::ScalarC ScalarC; + /// The scalar for D. + typedef typename Traits::Epilogue::ScalarD ScalarD; + /// The index. + typedef typename Traits::Index Index; + + /// The number of threads. + static int const kThreads = Traits::GemmConfig::kThreads; + + /// The params. + struct Params : public Traits::Params { + CUTLASS_HOST_DEVICE int initialize(Index m, + Index n, + Index k, + ScalarEpilogue alpha, + ScalarA const* d_a, + Index lda, + ScalarB const* d_b, + Index ldb, + ScalarEpilogue beta, + ScalarC const* d_c, + Index ldc, + ScalarD* d_d, + Index ldd) { + GemmDesc desc; + desc.m = m; + desc.n = n; + desc.k = k; + desc.alpha = alpha; + desc.beta = beta; + desc.d_a = reinterpret_cast(d_a); + desc.lda = lda; + desc.d_b = reinterpret_cast(d_b); + desc.ldb = ldb; + desc.d_c = reinterpret_cast(d_c); + desc.ldc = ldc; + desc.d_d = reinterpret_cast(d_d); + desc.ldd = ldd; + return Traits::Params::initialize(desc); + } + }; + +#if !defined(__CUDACC_RTC__) + /// Launch the kernel. + static __host__ cudaError_t launch(Params const& params, + cudaStream_t stream = cudaStreamDefault) { + // Setup the grid. + dim3 grid; + grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW; + grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH; + + // The number of threads. + dim3 block; + block.x = kThreads; + + // Launch the kernel. + void const* params_ = reinterpret_cast(¶ms); + + return cudaLaunchKernel(reinterpret_cast(&gemm_kernel), + grid, + block, + const_cast(¶ms_), + 0, + stream); + } + + /// Launch the kernel. + static __host__ cudaError_t launch(CUfunction kernel, + Params const& params, + CUstream stream = CU_STREAM_LEGACY) { + // Setup the grid. + dim3 grid; + grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW; + grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH; + + // The number of threads. + dim3 block; + block.x = kThreads; + + // Launch the kernel. + void* params_[] = {const_cast(reinterpret_cast(¶ms))}; + + // return cudaLaunchKernel(reinterpret_cast(&gemm_kernel), grid, block, + // const_cast(¶ms_), 0, stream); + CUresult result = cuLaunchKernel( + kernel, grid.x, grid.y, grid.z, block.x, block.y, block.z, 0, stream, params_, 0); + + if (result != CUDA_SUCCESS) { + return cudaErrorLaunchFailure; + } + return cudaSuccess; + } + +#endif + + /// Ctor. + CUTLASS_DEVICE Gemm(Params const& params_, SharedStorage& shared_storage_) + : params(params_), shared_storage(shared_storage_) {} + + /// Do the GEMM. + CUTLASS_DEVICE void multiply_add() { + // Swizzle the IDs of the block (to enable better cache behavior). + typename Traits::BlockSwizzle block_swizzle; + dim3 block = block_swizzle.swizzle(); + + // Scale the id. + block.x *= Traits::OutputTile::kW; + block.y *= Traits::OutputTile::kH; + + // We may want to use shared memory to clear the registers. + typedef typename Traits::ClearAccumulators ClearAccumulators; + + // The streams to read A/B from global memory to shared memory. + typename Traits::GlobalLoadStream global_stream(params, shared_storage, block); + + // Create the accumulator clear. + ClearAccumulators clear(shared_storage.main_loop.clear); + + /// Define the mainloop iteration size + typedef typename Traits::MultiplyAdd MultiplyAdd; + + // By how much we unroll the main loop. + Index const kUnroll = static_cast(MultiplyAdd::AccumulatorsPerWarp::kD); + + // If we do not have enough steps in the main loop, trigger the residue code. + if (params.k < kUnroll) { + global_stream.residue(params.k, true); + } + + // Fetch the fragments for A and B from global memory. + global_stream.copy(); + + // Copy the elements to shared memory (after transformation if needed). + global_stream.commit(); + + // Make sure the data is in shared memory. + Traits::shared_store_fence(false); + + // The unrolling steps for the main loop. + int const kUnrollingSteps = + MultiplyAdd::AccumulatorsPerWarp::kD / MultiplyAdd::InstructionShape::kD; + + // Make sure we have at least 2 unrolling steps or our pipeling is not going to work. + static_assert(kUnrollingSteps >= 2, "The pipelining assumes at least two steps"); + + // The stream of data from shared memory to fragments. + typename Traits::SharedLoadStream shared_load_stream(params, shared_storage); + + // Trigger the copy from shared memory for the 1st stream. + shared_load_stream.copy(0); + + // Allocate the accumulators. + typename MultiplyAdd::Accumulators accumulators; + // Clear the accumulators. + clear.clear(accumulators); + + // Enter the main loop and iterate. + typedef typename Traits::Index Index; + for (Index outer_k = params.k - kUnroll; outer_k > -kUnroll; outer_k -= kUnroll) { + // If that's the last "load iteration" update the predicates. + int const is_residue = outer_k <= kUnroll; + if (is_residue) { + global_stream.residue(outer_k); + } + + // Load data for the next iteration of the main loop. + global_stream.copy(); + + CUTLASS_PRAGMA_UNROLL + for (int step = 0; step < kUnrollingSteps - 1; ++step) { + // Trigger the copy from shared memory for the next A/B values. + shared_load_stream.copy(step + 1); + // Make sure the values are available for the current iteration to do the multiply-add. + shared_load_stream.commit(step); + + // Do the math on the fragments of the current iteration. + MultiplyAdd multiply_add; + multiply_add.multiply_add(shared_load_stream.fragment_a(step), + shared_load_stream.fragment_b(step), + accumulators, + accumulators); + } + + // Make sure the data from shared memory has been entirely consumed. + Traits::shared_load_fence(true); + + // Commit the data in shared memory for A/B. + global_stream.commit(); + + // Make sure the data is in shared memory. + Traits::shared_store_fence(true); + + // Move to the next stage for the load (if it makes sense). + shared_load_stream.inc_stage(); + // Trigger the copy from shared memory for the next loop iteration. + shared_load_stream.copy(0); + // Make sure the values are available for the current iteration to do the multiply-add. + shared_load_stream.commit(kUnrollingSteps - 1); + + // Do the math on the fragments of the current iteration. + MultiplyAdd multiply_add; + multiply_add.multiply_add(shared_load_stream.fragment_a(kUnrollingSteps - 1), + shared_load_stream.fragment_b(kUnrollingSteps - 1), + accumulators, + accumulators); + } + + // Epilogue. + typedef typename Traits::Epilogue Epilogue; + Epilogue epilogue(params.epilogue, shared_storage.epilogue, params.m, params.n); + epilogue.epilogue(cutlass::make_Coord(0, block.y, block.x), accumulators); + } + + /// The params. + Params const& params; + /// The shared storage. + SharedStorage& shared_storage; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_epilogue.h b/cutlass/gemm/gemm_epilogue.h new file mode 100644 index 0000000000..de6513a404 --- /dev/null +++ b/cutlass/gemm/gemm_epilogue.h @@ -0,0 +1,225 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the epilogue phase of the GEMM kernel that efficiently updates global memory + with + the computed matrix product. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTLASS_DEVICE bool is_zero(T x) { + return x == T(0); +} + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast(x) == int16_t(0); } +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmEpilogue { + /// The traits class. + typedef GemmEpilogueTraits_ Traits; + /// The params. + typedef typename Traits::Params Params; + /// The shared storage. + typedef typename Traits::SharedStorage SharedStorage; + + /// The output tile. + typedef typename Traits::OutputTile OutputTile; + /// The number of iterations. + typedef typename Traits::Iterations Iterations; + /// The accumulators. + typedef typename Traits::Accumulators Accumulators; + /// The scalar. + typedef typename Traits::Scalar Scalar; + /// The functor in charge of the math. + typedef typename Traits::Functor Functor; + + /// We do not support 3D or 4D shapes. + static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes"); + + /// The iterator for C in global memory. + typedef typename Traits::GlobalLoadIteratorC GlobalLoadIteratorC; + /// The transformer for C. + typedef typename Traits::GlobalTransformerC GlobalTransformerC; + /// The transformer for D. + typedef typename Traits::GlobalTransformerD GlobalTransformerD; + /// The iterator for D in global memory. + typedef typename Traits::GlobalStoreIteratorD GlobalStoreIteratorD; + /// The iterator to store D in shared memory. + typedef typename Traits::SharedStoreIteratorD SharedStoreIteratorD; + /// The shared store transformer for D. + typedef typename Traits::SharedStoreTransformerD SharedStoreTransformerD; + /// The iterator to load D in shared memory. + typedef typename Traits::SharedLoadIteratorD SharedLoadIteratorD; + /// The shared load transformer for D. + typedef Copy SharedLoadTransformerD; + + /// The index. + typedef typename Traits::Index Index; + + /// The scalar for C. + typedef typename GlobalLoadIteratorC::Scalar ScalarC; + /// The scalar for D. + typedef typename GlobalStoreIteratorD::Scalar ScalarD; + + /// Ctor. + CUTLASS_DEVICE GemmEpilogue(Params const& params_, + SharedStorage& shared_storage_, + Index m_, + Index n_) + : params(params_), shared_storage(shared_storage_), m(m_), n(n_) {} + + /// Execute the epilogue. + CUTLASS_DEVICE void epilogue(Coord<3> const& block, Accumulators& accumulators) { + if (is_zero(params.functor.beta)) { + epilogue_with_or_without_beta(block, accumulators); + } else { + epilogue_with_or_without_beta(block, accumulators); + } + } + + template + CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord<3> const& block, + Accumulators& accumulators) { + + Coord<3> const bounds = cutlass::make_Coord(0, n, m); + + // The functor. + Functor functor(params.functor); + // The C fragment. + typename GlobalLoadIteratorC::Fragment fragment_c; + // The transformed C fragment. + typename GlobalTransformerC::OutputFragment transformed_c; + + CUTLASS_PRAGMA_UNROLL + for (int h = 0; h < Iterations::kH; ++h) { + // Compute pointer and predicate offsets for C and D global iterators. + int const pointer_offset = + ((params.iterator_d.inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) + + params.iterator_d.inc_advance) * + Iterations::kW + + params.stride_h) * + h; + int const predicate_offset = + ((params.iterator_d.predicate_inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) + + params.iterator_d.predicate_inc_advance) * + Iterations::kW + + Traits::Delta::kH) * + h; + + // The iterator to load the elements of the C matrix. + GlobalLoadIteratorC global_load_iterator( + params.iterator_c, bounds, block, pointer_offset, predicate_offset); + // The transformer for C. + GlobalTransformerC transformer_c; + // The transformer for D. + GlobalTransformerD transformer_d; + // The iterator to store into the D matrix. + GlobalStoreIteratorD global_store_iterator( + params.iterator_d, bounds, block, pointer_offset, predicate_offset); + + CUTLASS_PRAGMA_UNROLL + for (int w = 0; w < Iterations::kW; ++w) { + // Load the C matrix into fragment. + if (!kBetaIsZero_) { + iterator_load(global_load_iterator, fragment_c); + } + + // Make sure we can write to shared memory. + shared_load_fence(); + + // Copy the accumulators to shared memory. + int const offset = (h * Iterations::kW + w) * SharedStoreIteratorD::Fragment::kElements; + + SharedStoreTransformerD shared_store_transformer; + typename SharedStoreTransformerD::OutputFragment shared_store_transformed_d; + shared_store_transformer.transform(accumulators, offset, shared_store_transformed_d); + + SharedStoreIteratorD shared_store_iterator(params.shared_store_iterator_d, + shared_storage.shared_stream.store); + shared_iterator_store(shared_store_iterator, shared_store_transformed_d); + + // Make sure the data is in shared memory. + shared_store_fence(); + + // Copy the accumulators back to registers from shared memory. + SharedLoadIteratorD shared_load_iterator(params.shared_load_iterator_d, + shared_storage.shared_stream.load); + typename SharedLoadIteratorD::Fragment fetched_d; + shared_iterator_load(shared_load_iterator, fetched_d); + + // Do the math. + typename GlobalTransformerD::InputFragment fragment_d; + + if (kBetaIsZero_) { + functor.evaluate(fetched_d, fragment_d); + } else { + // Transform C fragment. + transformer_c.transform(fragment_c, transformed_c); + // Do the math. + functor.evaluate(fetched_d, transformed_c, fragment_d); + } + + // Transform D fragment. + typename GlobalTransformerD::OutputFragment transformed_d; + transformer_d.transform(fragment_d, transformed_d); + + // Copy the results to global memory. + iterator_store(global_store_iterator, transformed_d); + } + } + } + + /// The memory fence for shared loads. + CUTLASS_DEVICE void shared_load_fence() { __syncthreads(); } + + /// The memory fence for shared stores. + CUTLASS_DEVICE void shared_store_fence() { __syncthreads(); } + + /// The params. + Params const& params; + /// The shared storage. + SharedStorage& shared_storage; + /// The dimensions of the GEMM. + Index m, n; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_epilogue_traits.h b/cutlass/gemm/gemm_epilogue_traits.h new file mode 100644 index 0000000000..c06fc25026 --- /dev/null +++ b/cutlass/gemm/gemm_epilogue_traits.h @@ -0,0 +1,331 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural properties of the GEMM epilogue. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The output tile. + typename OutputTile_, + /// The accumulators. + typename Accumulators_, + /// The iterator to load C from global memory. + typename GlobalLoadIteratorC_, + /// The transformer for C. + typename GlobalTransformerC_, + /// The transformer for D. + typename GlobalTransformerD_, + /// The iterator to store D to global memory. + typename GlobalStoreIteratorD_, + /// The iterator to store D to shared memory. + typename SharedStoreIteratorD_, + /// The shared store transformer for D. + typename SharedStoreTransformerD_, + /// The iterator to load D from shared memory. + typename SharedLoadIteratorD_, + /// The number of iterations in the epilogue. + typename Iterations_, + /// The iterations strides. + typename Delta_, + /// The functor to be used in the epilogue. + typename Functor_, + /// The index. + typename Index_ = int> +struct GemmEpilogueTraits { + // + /// The output tile. + typedef OutputTile_ OutputTile; + /// The number of iterations. + /// The accumulators. + typedef Accumulators_ Accumulators; + /// The iterator for C in global memory. + typedef GlobalLoadIteratorC_ GlobalLoadIteratorC; + /// The transformer for C. + typedef GlobalTransformerC_ GlobalTransformerC; + /// The transformer for D. + typedef GlobalTransformerD_ GlobalTransformerD; + /// The iterator for D in global memory. + typedef GlobalStoreIteratorD_ GlobalStoreIteratorD; + /// The iterator to store D in shared memory. + typedef SharedStoreIteratorD_ SharedStoreIteratorD; + /// The shared store transformer for D. + typedef SharedStoreTransformerD_ SharedStoreTransformerD; + /// The iterator to store D in shared memory. + typedef SharedLoadIteratorD_ SharedLoadIteratorD; + /// typedef typename GemmConfig::EpilogueIterations Iterations; + typedef Iterations_ Iterations; + /// The iterations strides. + typedef Delta_ Delta; + + /// The functor in charge of the math. + typedef Functor_ Functor; + /// The index. + typedef Index_ Index; + + /// We do not support 3D or 4D shapes. + static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes"); + + /// The scalar. + typedef typename Functor::Scalar Scalar; + /// The scalar for C. + typedef typename GlobalLoadIteratorC::Scalar ScalarC; + /// The scalar for D. + typedef typename GlobalStoreIteratorD::Scalar ScalarD; + + /// The params. + struct Params { + /// The strides for H and W in the different iterations of the epilogue. + Index stride_h, stride_w; + /// The params for the C iterator. + typename GlobalLoadIteratorC::Params iterator_c; + /// The params for the D global iterator. + typename GlobalStoreIteratorD::Params iterator_d; + /// The params for the D shared store iterator. + typename SharedStoreIteratorD::Params shared_store_iterator_d; + /// The params for the D shared load iterator. + typename SharedLoadIteratorD::Params shared_load_iterator_d; + /// The functor params. + typename Functor::Params functor; + + /// Setup the params. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + // The parameters for the functor. + int error_code = functor.initialize(desc); + if (error_code) { + return error_code; + } + + // At the end of the H iteration, we jump over a number of columns. + this->stride_h = desc.ldd * Delta::kH; + // Nothing to do here. + this->stride_w = 0; + + // Setup the params for the global memory iterator for C. + error_code = iterator_c.initialize( + reinterpret_cast(desc.d_c), desc.ldc, desc.n, stride_w, Delta::kW); + if (error_code) { + return error_code; + } + + // Setup the params for the global memory iterator for D. + return iterator_d.initialize( + reinterpret_cast(desc.d_d), desc.ldd, desc.n, stride_w, Delta::kW); + } + }; + + /// The shared memory storage to exchange data. + union StreamSharedStorage { + // The storage for the store iterator. + typename SharedStoreIteratorD::SharedStorage store; + // The storage for the store iterator. + typename SharedLoadIteratorD::SharedStorage load; + }; + + /// The shared memory to swizzle the data in the epilogue. + struct SharedStorage { + // The storage for the shared stream D. + StreamSharedStorage shared_stream; + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmEpilogueTraitsHelper { + /// The scalar. + typedef typename EpilogueFunctor_::Scalar Scalar; + /// The output tile. + typedef typename GemmConfig_::OutputTile OutputTile; + + /// The number of iterations in the epilogue. + typedef Shape<1, + GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH / + GemmConfig_::kAccumulatorsPerLdsB, + GemmConfig_::kAccumulatorsPerLdsB> + Iterations; + // The iteration strides in the H/W dimension. + typedef Shape<0, + GemmConfig_::kAccumulatorsPerLdsB*( + GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), + 0> + Delta; + /// The functor to do the math in the epilogue. + typedef EpilogueFunctor_ Functor; + + /// The traits class to build the iterator to store to shared memory for D. + typedef GemmSharedStoreTileDTraits< + // The pointer is float. + typename Functor::Scalar, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The number of scalars per STS. + GemmConfig_::kScalarsPerStsD, + // The skew -- 128 / sizeof(ScalarD) / kScalarsPerStsD is the number of threads involved in + // a single STS. We divide by 2 as our objective is to add a skew to the odd threads to + // avoid bank conflicts between odd and even threads. + 128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 * + GemmConfig_::kScalarsPerStsD> + SharedStoreTileTraits; + + /// The iterator to store D to shared memory. + typedef TileStoreIterator + SharedStoreIteratorD; + + /// The shared store transformer for D. + typedef Copy SharedStoreTransformerD; + + /// The traits class to build the iterator to load from shared memory for D. + typedef GemmSharedLoadTileDTraits< + // The pointer is float. + typename Functor::Scalar, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The number of columns of the output tile written by iteration. + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsD, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; + + /// The iterator to load D from shared memory. + typedef TileLoadIterator + SharedLoadIteratorD; + + /// The traits class to build the iterator to load data from global memory for C^N. + typedef GemmGlobalTileCdTraits< + // The pointer is float const. + typename GemmConfig_::ScalarC const, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // How many elements do we jump over at each iteration? + Iterations::kW, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgC> + GlobalLoadTileTraits; + + /// The iterator to load C. + typedef GemmGlobalIteratorCd GlobalLoadIteratorC; + /// The transformer for C. + typedef Copy GlobalTransformerC; + + /// The traits class to build the iterator to store data to global memory for D^N. + typedef GemmGlobalTileCdTraits< + // The pointer is float. + typename GemmConfig_::ScalarD, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // How many elements do we jump over at each iteration? + Iterations::kW, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerStgD> + GlobalStoreTileTraits; + + /// The iterator to store D. + typedef GemmGlobalIteratorCd GlobalStoreIteratorD; + /// The transformer for D. + typedef Copy GlobalTransformerD; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The GEMM config. + typename GemmConfig_, + /// The epilogue functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The index. + typename Index_ = int, + /// The helper to create the traits class. + typename Helper_ = GemmEpilogueTraitsHelper > +struct SimplifiedGemmEpilogueTraits : public GemmEpilogueTraits< + // The output tile. + typename GemmConfig_::OutputTile, + // The accumulators. + typename GemmConfig_::Accumulators, + // The global iterator for C. + typename Helper_::GlobalLoadIteratorC, + // The transformer for C. + typename Helper_::GlobalTransformerC, + // The transformer for D. + typename Helper_::GlobalTransformerD, + // The global iterator for D. + typename Helper_::GlobalStoreIteratorD, + // The iterator to store D to shared memory. + typename Helper_::SharedStoreIteratorD, + // The shared store transformer for D. + typename Helper_::SharedStoreTransformerD, + // The iterator to load D from shared memory. + typename Helper_::SharedLoadIteratorD, + // The number of iterations. + typename Helper_::Iterations, + // The strides between iterations. + typename Helper_::Delta, + // The functor to be used in the epilogue. + EpilogueFunctor_, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_global_stream.h b/cutlass/gemm/gemm_global_stream.h new file mode 100644 index 0000000000..194f0decf8 --- /dev/null +++ b/cutlass/gemm/gemm_global_stream.h @@ -0,0 +1,175 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements efficient loading of the thread block-level tile from global memory and + storing + to shared memory. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The load iterator. + typename LoadIterator_, + /// The store iterator to copy to shared memory. + typename StoreIterator_, + /// The transformer to be applied after the data has been copied from global memory. + typename Transformer_> + +struct GlobalLoadStreamBase { + /// The load iterator. + typedef LoadIterator_ LoadIterator; + /// The transformer. + typedef Transformer_ Transformer; + /// The store iterator to write to shared memory. + typedef StoreIterator_ StoreIterator; + + /// The fragment that is copied from shared memory. + typedef typename LoadIterator::Fragment FetchedFragment; + /// The fragment that is obtained after the transformation by the transformer. + typedef typename Transformer::OutputFragment TransformedFragment; + /// Make sure the fragments match. + static_assert((platform::is_same::value), + ""); + /// The output fragment. + typedef TransformedFragment Fragment; + /// Make sure the transformed fragment is the same as the store fragment. + static_assert((platform::is_same::value), + ""); + + /// The layout. + static MatrixLayout::Kind const kLayout = LoadIterator::kLayout; + /// The scalar type of the iterator. + typedef typename LoadIterator::Scalar Scalar; + /// The pointer. + typedef typename LoadIterator::Pointer Pointer; + /// The index. + typedef typename LoadIterator::Index Index; + + /// The params. + struct Params { + // The load iterator. + typename LoadIterator::Params load_iterator; + // The store iterator. + typename StoreIterator::Params store_iterator; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld) { + int error_code = load_iterator.initialize(pointer, ld); + if (error_code) { + return error_code; + } + + return store_iterator.initialize(); + } + }; + + /// The amount of storage in shared memory needed to store the tile. + typedef typename StoreIterator::SharedStorage SharedStoreStorage; + + /// The storage in shared memory needed by that stream. + union SharedStorage { + // The load iterator. + typename LoadIterator::SharedStorage load_iterator; + // The store iterator. + SharedStoreStorage store_iterator; + }; + + /// Ctor. + CUTLASS_DEVICE GlobalLoadStreamBase(Params const& params, + SharedStorage& shared_storage, + Coord<3> const bounds, + Coord<3> const& block) + : load_iterator(params.load_iterator, bounds, block), + transformer(), + store_iterator(params.store_iterator, shared_storage.store_iterator) + + { + fetched_fragment.clear(); + } + + /// Load the data from shared memory to the fetch fragment. + CUTLASS_DEVICE void copy() { iterator_load(load_iterator, fetched_fragment); } + + /// Commit the data. + CUTLASS_DEVICE void commit() { + transformer.transform(fetched_fragment, transformed_fragment); + iterator_store(store_iterator, transformed_fragment); + store_iterator.inc_stage(); + } + + /// Execute the residue code. + CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) { + load_iterator.residue(k); + if (!skip_clear) { + fetched_fragment.clear(); + } + } + + /// The iterator. + LoadIterator load_iterator; + /// The fragment to fetch from shared memory. + FetchedFragment fetched_fragment; + /// The transformer. + Transformer transformer; + /// The fragment to convert the data after it has been fetched from shared memory. + TransformedFragment transformed_fragment; + /// The store iterator. + StoreIterator store_iterator; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The load iterator. + typename LoadIterator_, + /// The store iterator to copy to shared memory. + typename StoreIterator_, + /// The transformer to be applied after the data has been copied from global memory. + typename Transformer_ = Copy > + +struct GlobalLoadStream : public GlobalLoadStreamBase { + /// The base class. + typedef GlobalLoadStreamBase Base; + + /// Ctor. + CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const& params, + typename Base::SharedStorage& shared_storage, + Coord<3> const& bounds, + Coord<3> const& block) + : Base(params, shared_storage, bounds, block) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_global_tile.h b/cutlass/gemm/gemm_global_tile.h new file mode 100644 index 0000000000..28bcc6a98c --- /dev/null +++ b/cutlass/gemm/gemm_global_tile.h @@ -0,0 +1,478 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines iterators for efficiently loading and storing to global memory. +*/ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// The following functor reshapes a tile of threads to match a tile of data. The idea is that when +// the user wants to build the iterator traits, he/she may want to specify the tile independently +// from the number of scalars loaded/stored per instruction. For example, in the row-major version +// with a tile of size 128x8 - the user may want to that the iterator works with 32x8 threads if +// each thread loads 1 scalar per LDG. If the user changes to 4 scalars per LDG, then the tile of +// threads has to change. The code below detects that and correct the code automatically - it is +// a helper when the user does not specify the right configuration. + +template +struct ReshapeThreads { + typedef Threads_ Threads; +}; + +template +struct ReshapeThreads { + typedef Shape Threads; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalTileTraits { + /// Identity of the operand + static GemmOperand::Kind const kOperand = kOperand_; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kAccessSize_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal; + + /// The tile shape + typedef typename ReshapeTile::Tile Tile; + /// The threads shape + typedef typename ReshapeThreads::Threads Threads; + /// The relative offset between two elements in the H/W dimension in adjacent threads. + typedef Shape<1, 1, Tile::kC> ThreadsDelta; + + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH, Threads::kW * kAccessSize> Delta; + /// Strides for immediate offset computation + typedef Shape<0, 0, Threads::kW * ThreadsDelta::kW, kAccessSize> ImmediateOffsetStrides; + /// The number of iterations needed to load/store the tile. + typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize> + Iterations; + + typedef GemmMultiplicandTraits MultiplicandTraits; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalTileCdTraits : public GemmGlobalTileTraits { + /// The base class. + typedef GemmGlobalTileTraits + Base; + + /// The stride in the H dimension. + static int const kStrideH = kStrideH_; + /// Override the strides in each dimension between different loads/stores. + typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> Delta; + + typedef typename Base::Iterations Iterations; + + typedef typename Base::Threads Threads; + + typedef typename Base::ThreadsDelta ThreadsDelta; + + typedef typename Base::ImmediateOffsetStrides ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * kStrideH * Iterations::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalIteratorAb + : public TileLoadIterator { + /// This class. + typedef GemmGlobalIteratorAb This_; /// The base class. + + typedef TileLoadIterator + Base; + /// The layout. + static MatrixLayout::Kind const kLayout = TileTraits_::kLayout; + /// Fragment type loaded by the iterator + typedef typename Base::Fragment Fragment; + /// The scalar. + typedef typename TileTraits_::Scalar Scalar; + /// The threads. + typedef typename TileTraits_::Threads Threads; + /// The index. + typedef Index_ Index; + /// The thread offset + typedef typename TileTraits_::ThreadOffset ThreadOffset; + /// Specifies in which dimension post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Base::kAdvance; + + typedef cutlass::PredicateVector::kCount> PredicateVector; + + /// Iterator parameters type + typedef typename Base::Params BaseParams; + + struct Params : public BaseParams { + /// Initializes params to load a strip-mined tile, given pointer and stride_h. + CUTLASS_HOST_DEVICE int initialize(Scalar const* ptr, Index stride_h) { + Index inc_d = 0; + Index inc_advance = 0; + // Move by some columns for each iteration in the H dimension. + Index inc_h = Base::Delta::kH * stride_h; + + // Move by some more columns in the number of iterations if the D dimension is > 1. + if (Base::Delta::kD > 0) { + inc_d = Base::Delta::kD * stride_h - (Base::Iterations::kH - 1) * inc_h; + } + + // Move to the beginning of the next iteration. + if (kAdvance == IteratorAdvance::kH && Base::Delta::kD > 0) { + inc_advance = inc_d; + } else if (kAdvance == IteratorAdvance::kH) { + inc_advance = inc_h; + } else if (Base::Delta::kD > 0) { + inc_advance = (Base::Iterations::kW + 0) * ShapeCount::kWc - + (Base::Iterations::kH - 1) * inc_h - + (Base::Iterations::kD - 1) * Base::Delta::kD * stride_h; + } else { + inc_advance = (Base::Iterations::kW + 0) * ShapeCount::kWc - + (Base::Iterations::kH - 1) * inc_h; + } + + Base::Params::initialize(ptr, 0, stride_h, 0, inc_d, inc_h, 0, inc_advance); + return 0; + } + }; + + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + /// The parameters + Params params; + + CUTLASS_DEVICE void initialize_predicates(const Coord<3>& bounds, const Coord<3>& block) { + // Setup the masks to control loads. + predicates.fill(0); + + int bounds_h, bounds_w; + if (kAdvance == IteratorAdvance::kH) { + bounds_w = bounds[2] - block[2]; + bounds_h = bounds[1]; + + } else { + bounds_w = bounds[1]; + bounds_h = bounds[2] - block[1]; + } + + // Fill in the bits of the predicate vector. + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + bool flag = w * Base::Delta::kW < bounds_w; + if (kAdvance == IteratorAdvance::kH) { + flag = flag && (h * Base::Delta::kH + d * Base::Delta::kD) < bounds_h; + } else { + flag = flag && (h * Base::Delta::kH) < bounds_h; + } + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + predicates.set(bit, flag); + } + } + } + } + } + + /// Ctor. + CUTLASS_DEVICE GemmGlobalIteratorAb(Params const& _params, + const Coord<3>& bounds, + const Coord<3>& block, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params) { + thread_offset = thread_offset_func(); + // The column. + Index block_h = thread_offset[1]; + // The contiguous dimension. + Index block_w = thread_offset[2]; + + // Add the blocks indices. + if (kAdvance == IteratorAdvance::kH) { + block_h += block[1]; + block_w += block[2]; + + } else { + block_h += block[2]; + block_w += block[1]; + } + + // Setup the pointer. + params.pointer += (block_h * params.stride_h + block_w); + + // Initialize predicates + initialize_predicates(bounds, make_Coord(0, block_h, block_w)); + } + + /// Increment the pointer in the H dimension. + CUTLASS_DEVICE void inc_h() { params.pointer += params.inc_h; } + /// Increment the pointer in the D dimension. + CUTLASS_DEVICE void inc_d() { params.pointer += params.inc_d; } + /// Increment the pointer to move to the next iteration. + CUTLASS_DEVICE void inc_advance() { params.pointer += params.inc_advance; } + + /// Returns the current pointer + CUTLASS_HOST_DEVICE + Scalar const* data() const { return params.pointer; } + + /// That's the residue! Update the predicates. + CUTLASS_DEVICE void residue(Index k) { + // The coordinates of the thread. + Index block_h = thread_offset[1]; + // The contiguous dimension. + Index block_w = thread_offset[2]; + + // Update the predicate vector. + for (int d = 0; d < Base::Iterations::kD; ++d) { + for (int h = 0; h < Base::Iterations::kH; ++h) { + for (int w = 0; w < Base::Iterations::kW; ++w) { + for (int c = 0; c < Base::Iterations::kC; ++c) { + Index offset = 0; + if (kAdvance == IteratorAdvance::kH) { + offset += block_h + h * Base::Delta::kH + d * Base::Delta::kD; + } else { + offset += block_w + w * Base::Delta::kW; + } + + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + if (offset >= k) { + predicates.set(bit, false); + } + } + } + } + } + } + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + return predicates[bit]; + } + + /// The predicates. + PredicateVector predicates; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmGlobalIteratorCd : public TileIteratorBase { + /// This class. + typedef GemmGlobalIteratorCd This_; + /// The base class. + typedef TileIteratorBase + Base; + + /// The layout. + static MatrixLayout::Kind const kLayout = TileTraits_::kLayout; + + /// The scalar. + typedef typename TileTraits_::Scalar Scalar; + /// The pointer. + typedef typename TileTraits_::Pointer Pointer; + /// The threads. + typedef typename TileTraits_::Threads Threads; + /// The index. + typedef Index_ Index; + /// The thread offset + typedef typename TileTraits_::ThreadOffset ThreadOffset; + + /// The params. + struct Params { + /// The pointer. + Pointer pointer; + /// The stride in the H dimension to setup the thread in the block. + Index stride_h; + /// The strides to increment the pointer. + Index inc_advance, inc_h; + /// The strides to increment the predicate offset + Index predicate_inc_advance, predicate_inc_h; + /// The column offset to compute the predicate for the columns. + Index predicate_offset; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize( + Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w) { + // The pointer. + this->pointer = pointer; + // Each column of the matrix. + stride_h = TileTraits_::ThreadsDelta::kH * ld; + // Each thread output 1 column per iteration. The stride between columns is given by the + // number of scalars that are loaded per LDS for B. + inc_h = ld * TileTraits_::kStrideH; + inc_advance = + (ld - ld * TileTraits_::kStrideH * (Base::Iterations::kH - 1)) + epilogue_stride_w; + + predicate_offset = bound; + predicate_inc_h = TileTraits_::kStrideH; + predicate_inc_advance = + -((TileTraits_::kStrideH * (Base::Iterations::kH - 1) - 1) + epilogue_delta_w); + + return 0; + } + }; + + Params params; + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + + /// Ctor. + CUTLASS_DEVICE GemmGlobalIteratorCd() {} + + /// Ctor. + CUTLASS_DEVICE GemmGlobalIteratorCd(Params const& params, + const Coord<3>& bounds, + const Coord<3>& block, + int offset = 0, + int pred_offset = 0, + ThreadOffset thread_offset_func = ThreadOffset()) + : params(params) { + thread_offset = thread_offset_func(); + // Each warp works on a different column of the tile. + int const h = thread_offset[1] + block[1]; + // Each lane writes a different element. + int const w = thread_offset[2] + block[2]; + // Setup the pointer. + this->params.pointer += ((h * params.stride_h + w) + offset); + + // Prepare the vector of predicates. + for (int i = 0; i < Base::Iterations::kW; ++i) { + predicates.set(i, w + i * Base::Delta::kW < bounds[2]); + } + this->params.predicate_offset -= (h + pred_offset); + } + + /// Increment the pointer in the C dimension. + CUTLASS_DEVICE void inc_c() {} + /// Increment the pointer in the W dimension. + CUTLASS_DEVICE void inc_w() {} + /// Increment the pointer in the H dimension. + CUTLASS_DEVICE void inc_h() { + params.pointer += params.inc_h; + params.predicate_offset -= params.predicate_inc_h; + } + /// Increment the pointer in the D dimension. + CUTLASS_DEVICE void inc_d() {} + /// Increment the pointer to move to the next iteration. + CUTLASS_DEVICE void inc_advance() { + params.pointer += params.inc_advance; + this->params.predicate_offset -= params.predicate_inc_advance; + } + + /// Test the validity of the iterator. + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + return predicates.at(w) && params.predicate_offset > 0; + } + + /// Returns the raw pointer + CUTLASS_HOST_DEVICE + Pointer data() { return params.pointer; } + + CUTLASS_HOST_DEVICE + Pointer const data() const { return params.pointer; } + + /// The predicates for the row. + cutlass::PredicateVector predicates; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_operand.h b/cutlass/gemm/gemm_operand.h new file mode 100644 index 0000000000..737f993f01 --- /dev/null +++ b/cutlass/gemm/gemm_operand.h @@ -0,0 +1,141 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear + memory. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Helper to describe attributes of GEMM matrix operands +template +struct GemmOperandTraitsAb { + static const bool Congruous = + (kOperand_ == GemmOperand::kA ^ kLayout_ == MatrixLayout::kRowMajor); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GetExtent; + +template +struct GetExtent { + static const int kExtent = Tile_::kW; +}; + +template +struct GetExtent { + static const int kExtent = Tile_::kH; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Determines the shape of a multiplicand tile in terms of strided (H) and contiguous (W) +/// dimensions +template +struct GemmMultiplicandTraits { + // Only defined for A or B + static_assert(Usage == GemmOperand::kA || Usage == GemmOperand::kB, + "MultiplicandTileShape defined only for A or B operands."); + + /// Shape of GEMM thread block tile (K, N, M) + typedef ThreadBlockTile_ ThreadBlockTile; + + /// Identifies multiplicand + static GemmOperand::Kind const kUsage = Usage; + + /// Layout of tile + static MatrixLayout::Kind const kLayout = Layout; + + // True if K is the strided dimension + static bool const kKstrided = (kUsage == GemmOperand::kA ^ kLayout == MatrixLayout::kRowMajor); + + /// Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand + typedef typename platform::conditional< + kKstrided, + Shape<1, ThreadBlockTile::kD, GetExtent::kExtent>, + Shape<1, GetExtent::kExtent, ThreadBlockTile::kD> >::type Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Project's a coordinate (K, N, M) onto inner and outer dimensions defined for each +/// operand. +template +struct ProjectOperand; + +/// Project A operand - (0, K, M) +template +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { + if (Kstrided) { + return make_Coord(0, coord[0], coord[2]); + } else { + return make_Coord(0, coord[2], coord[0]); + } + } +}; + +/// Project B operand - (0, K, N) +template +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { + if (Kstrided) { + return make_Coord(0, coord[0], coord[1]); + } else { + return make_Coord(0, coord[1], coord[0]); + } + } +}; + +/// Project C operand - (0, N, M) +template <> +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); } +}; + +/// Project D operand - (0, N, M) +template <> +struct ProjectOperand { + CUTLASS_HOST_DEVICE + static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_shared_stream.h b/cutlass/gemm/gemm_shared_stream.h new file mode 100644 index 0000000000..c6ff7bd973 --- /dev/null +++ b/cutlass/gemm/gemm_shared_stream.h @@ -0,0 +1,113 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines abstractions for managing loading and storing fragments to shared memory in the + efficient GEMM pipeline. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The load iterator. + typename Iterator_, + /// The transformer to be applied after the data has been copied from shared memory. + typename Transformer_ = Copy > + +struct SharedLoadStream { + /// The load iterator. + typedef Iterator_ Iterator; + /// The transformer. + typedef Transformer_ Transformer; + + /// The fragment that is copied from shared memory. + typedef typename Iterator::Fragment FetchedFragment; + /// The fragment that is obtained after the transformation by the transformer. + typedef typename Transformer::OutputFragment TransformedFragment; + /// Make sure the fragments match. + static_assert((platform::is_same::value), + ""); + /// The output fragment. + typedef TransformedFragment Fragment; + + /// The params. + struct Params { + /// The iterator params. + typename Iterator::Params iterator; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize() { return iterator.initialize(); } + }; + + /// The storage in shared memory needed by that stream. + typedef typename Iterator::Storage SharedStorage; + + /// Ctor. + CUTLASS_DEVICE SharedLoadStream() {} + + /// Ctor. + CUTLASS_DEVICE SharedLoadStream(Params const ¶ms, SharedStorage &shared_storage) { + this->initialize(params, shared_storage); + } + + /// Initialize the stream. + CUTLASS_DEVICE void initialize(Params const ¶ms, SharedStorage &shared_storage) { + // The iterator. + iterator = Iterator(params.iterator, shared_storage); + // The transformer. + transformer = Transformer(); + } + + /// Load the data from shared memory to the fetch fragment. + CUTLASS_DEVICE void copy(FetchedFragment &fetched) { shared_iterator_load(iterator, fetched); } + + /// Load the data from shared memory to the fetch fragment. + CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched) { + shared_iterator_load(iterator, fetched, d); + } + + /// Commit the data. + CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed) { + transformer.transform(fetched, transformed); + } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { iterator.inc_stage(); } + + /// The iterator. + Iterator iterator; + /// The transformer. + Transformer transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_shared_tile.h b/cutlass/gemm/gemm_shared_tile.h new file mode 100644 index 0000000000..9ec4c9a271 --- /dev/null +++ b/cutlass/gemm/gemm_shared_tile.h @@ -0,0 +1,406 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines iterators for efficiently loading and storing tiles to and from shared memory. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedStoreTileAbTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The threads. + typedef Threads_ Threads; + /// The strides to compute the base position of the thread. + typedef Shape<0, ShapeCount::kWc, Tile::kC, kScalarsPerSts_> ThreadsStrides; + /// The skew. + static int const kSkew = 0; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerSts_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, + Tile::kH / Threads::kH, + Tile::kW / Threads::kW, + Tile::kC / Threads::kC / kAccessSize> + Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kAccessSize> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kAccessSize> + ImmediateOffsetStrides; + + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int offset = ComputeThreadOffsetFromStrides::get(); + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedStoreWithSkewTileAbTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile without skews. + typedef typename ReshapeTile::Tile TileWithoutSkew; + /// The tile. + typedef typename ReshapeTile, + kScalarsPerSts_>::Tile Tile; + /// The threads. + typedef Threads_ Threads; + /// The skew. + static int const kSkew = kSkew_; + /// The number of scalars per STS. + static int const kAccessSize = kScalarsPerSts_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, ShapeCount::kWc, Threads::kH * kAccessSize> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, ShapeCount::kWc, Threads::kH * kAccessSize> ImmediateOffsetStrides; + + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int offset = ComputeThreadOffsetFromStrides::get(); + return make_Coord(0, 0, offset, 0); + } + }; + + protected: + /// The strides to compute the base position of the thread. + typedef Shape<0, kScalarsPerSts_, ShapeCount::kHwc / Threads::kW> ThreadsStrides; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedLoadTileATraits { + static GemmOperand::Kind const kOperand = GemmOperand::kA; + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile without skew. + typedef Shape::kExtent * InstructionShape_::kD> + TileWithoutSkew_; + /// The tile with skew. + typedef Shape TileWithSkew; + /// The tile without skew after reshaping. + typedef typename ReshapeTile::Tile TileWithoutSkew; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The threads in a warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + // static int const kScalarsPerLds = kScalarsPerLds_; + static int const kAccessSize = kScalarsPerLds_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of warps. + static int const kWarps = GetExtent::kExtent; + /// The number of threads in one dimension of the warp. + static int const kThreadsPerWarp = GetExtent::kExtent; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kScalarsPerLds*/> + Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape + ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // Extract the warp. + int const warp = threadIdx.x / kWarpSize % Warps::kW; + // Compute the row offset for each thread + int const lane = (threadIdx.x & 0x0e) / 2; + // The offset. + int const offset = (warp * ThreadsPerWarp::kW + lane) * kAccessSize; + + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedLoadTileBTraits { + static GemmOperand::Kind const kOperand = GemmOperand::kB; + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The tile without skew. + typedef Shape::kExtent * InstructionShape_::kD> + TileWithoutSkew_; + /// The tile with skew. + typedef Shape TileWithSkew; + /// The tile without skew after reshaping. + typedef typename ReshapeTile::Tile TileWithoutSkew; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The threads in a warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerLds_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of warps. + static int const kWarps = GetExtent::kExtent; + /// The number of threads in one dimension of the warp. + static int const kThreadsPerWarp = GetExtent::kExtent; + + /// The number of iterations needed to load/store the tile. + typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kAccessSize*/> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape + ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The position of the warp. + int const warp = threadIdx.x / (Warps::kW * kWarpSize); + + // Compute the column offset for each thread + int const lane = (threadIdx.x & 0x10) / 8 + (threadIdx.x & 0x01); + // The offset. + int const offset = (warp * ThreadsPerWarp::kH + lane) * kAccessSize; + + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedStoreTileDTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The dimension of the output tile. + typedef OutputTile_ OutputTile; + /// The warps in the tile. + typedef Warps_ Warps; + /// The threads in the warps. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerSts_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of scalars per thread. + static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW; + /// The number of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + /// The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). + static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew; + + /// The tile. + typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> Tile; + /// The number of iterations needed to store the tile. + typedef Shape<1, 1, kScalarsPerThread / kAccessSize> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // We issue STS.128 in the epilogue to store the accumulators to shared memory. When we use + // STS.128, we have to guarantee that threads in groups of 8 do not have bank conflicts (i.e + // they write to different banks). + + // Odd threads go to the second half of shared memory. + int const row = threadIdx.x & 0x01; + + int const warp_id = (threadIdx.x >> 5); + + int const warp_row = (warp_id % Warps::kW); + int const warp_col = (warp_id / Warps::kW); + + int hi_halfwarp_offset = OutputTile::kW * ((threadIdx.x >> 4) & 1); + int lo_halfwarp_offset = (((threadIdx.x >> 1) & 0x7) + warp_row * ThreadsPerWarp::kW); + + int col = kAccessSize * lo_halfwarp_offset + + warp_col * (ThreadsPerWarp::kH / 2) * OutputTile::kW + hi_halfwarp_offset; + + int offset = row * kScalarsPerRow + col; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmSharedLoadTileDTraits { + /// The scalar. + typedef typename platform::remove_const::type Scalar; + /// The pointer. + typedef Scalar_* Pointer; + /// The dimension of the output tile. + typedef OutputTile_ OutputTile; + /// The warps in the tile. + typedef Warps_ Warps; + /// The threads in the warps. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of scalars per LDG/STG. + static int const kAccessSize = kScalarsPerLds_; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The number of scalars per thread. + static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW; + /// The number of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + /// The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). + static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew; + + /// The tile. + typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> Tile; + + // Compute the number of iterations per warp in the Tile::kH dimension. + static int const kIterationsInHPerWarp = kTileH_ / ShapeCount::kCount; + + // As shown above, the shared memory tile is composed of 2 rows and each rows is made of + // kScalarsPerRow. A warp is expected to read from the 1st row, then move to the 2nd row and go + // back to the 1st row. To model that scheme we define the Iterations shape as Shape. + // However, in some cases, we have only 1 iteration per warp. In that case, we must define the + // shape as Shape<1, 1, ...>. The following code does that. + static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2; + // As soon as we know kIterationsH, it is trivial to compute kIterationsD: + static int const kIterationsD = kIterationsInHPerWarp / kIterationsH; + + /// The number of iterations needed to store the tile. + typedef Shape Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape ImmediateOffsetStrides; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // Each warp works on a different column. + int const h = threadIdx.x / kWarpSize; + // Compute the row. + int const w = (threadIdx.x & (kWarpSize - 1)) * kAccessSize; + int offset = 0; + if (Iterations::kH == 1) { + int const row = h & 0x1; + int const col = h / 2; + offset = row * ShapeCount::kWc + col * OutputTile::kW * Iterations::kD + w; + } else { + offset = h * OutputTile::kW * Iterations::kD + w; + } + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/gemm_traits.h b/cutlass/gemm/gemm_traits.h new file mode 100644 index 0000000000..7a77d4b0d9 --- /dev/null +++ b/cutlass/gemm/gemm_traits.h @@ -0,0 +1,747 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural properties of complete GEMM computation. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The scalar type for A. + typename ScalarA_, + /// The scalar type for B. + typename ScalarB_, + /// The scalar type for C. + typename ScalarC_, + /// The scalar type for D. + typename ScalarD_, + /// The output tile size for the GEMM KxNxM. + typename OutputTile_, + /// The functor to do the math. + typename MultiplyAdd_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_, + /// The number of scalars per STS for A. + int kScalarsPerStsA_, + /// The number of scalars per LDG for A. + int kScalarsPerLdsA_, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_, + /// The number of scalars per STS for B. + int kScalarsPerStsB_, + /// The number of scalars per LDS for B. + int kScalarsPerLdsB_, + /// The number of scalars per LDG for C and STG for D. + int kScalarsPerLdgCAndStgD_, + /// The number of scalars per STS for D. + int kScalarsPerStsD_, + /// The number of scalars per LDS for D. + int kScalarsPerLdsD_, + /// The number of stages in shared memory to do single/double/triple-buffering. + int kStages_> + +struct GemmConfig { + // + /// The scalar for A. + typedef ScalarA_ ScalarA; + /// The scalar for B. + typedef ScalarB_ ScalarB; + /// The scalar for C. + typedef ScalarC_ ScalarC; + /// The scalar for D. + typedef ScalarD_ ScalarD; + + /// The tile. + typedef OutputTile_ OutputTile; + /// The functor to do D = A*B + C. + typedef MultiplyAdd_ MultiplyAdd; + /// The shape of the instruction. + typedef typename MultiplyAdd::InstructionShape InstructionShape; + /// The number of accumulators per warp. + typedef typename MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp; + /// The accumulators. + typedef typename MultiplyAdd::Accumulators Accumulators; + + /// The number of warps. + typedef typename ShapeDiv::Shape Warps; + /// The default warp size (32 threads per warp). + static int const kWarpSize = cutlass::kWarpSize; + /// The numnber of threads. + static int const kThreads = ShapeCount::kCount * kWarpSize; + + /// The number of scalars per LDG/STS/LDS for A. + static int const kScalarsPerLdgA = kScalarsPerLdgA_; + static int const kScalarsPerStsA = kScalarsPerStsA_; + static int const kScalarsPerLdsA = kScalarsPerLdsA_; + + /// The number of scalars per LDG/STS/LDS for B. + static int const kScalarsPerLdgB = kScalarsPerLdgB_; + static int const kScalarsPerStsB = kScalarsPerStsB_; + static int const kScalarsPerLdsB = kScalarsPerLdsB_; + + /// The number of scalars per LDG for C. + static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_; + + /// The number of scalars per STS/LDS/STG for D. + static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_; + static int const kScalarsPerStsD = kScalarsPerStsD_; + static int const kScalarsPerLdsD = kScalarsPerLdsD_; + + /// The number of accumulators that are going to be fed from one LDS A/B. + static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD; + static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD; + + /// The number of stages in shared memory to implement double, triple, more-buffering. + static int const kStages = kStages_; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperA { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for A^N. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is column-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for A^N. + typedef GemmSharedLoadTileATraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsA, + // The skew. + 0> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperA { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size MxK in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The number of scalars in 4B. + static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar); + /// The traits class to build the iterator to store data to shared memory for A^T. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS. + GemmConfig_::kScalarsPerStsA, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsA / + GlobalTileTraits::Threads::kW * kScalarsIn4B> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for A^T. + typedef GemmSharedLoadTileATraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsA, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperB { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // B is column-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size MxK in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The number of scalars in 4B. + static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar); + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxN in GEMM's terminology. + Shape, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS. + GemmConfig_::kScalarsPerStsB, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsB / + GlobalTileTraits::Threads::kW * kScalarsIn4B> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for B^N. + typedef GemmSharedLoadTileBTraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsB, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct GemmTileTraitsHelperB { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// The traits class to build the iterator to load data from global memory for B^T. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // B is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxN in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for B^T. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + MultiplyAddScalar, + // The tile has size KxN in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for B^T. + typedef GemmSharedLoadTileBTraits< + // The pointer is float const. + MultiplyAddScalar const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsB, + // The skew. + 0> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The GEMM configuration. + typename GemmConfig_, + /// The stream to load A from global memory to shared memory. + typename GlobalLoadStreamA_, + /// The stream to load B from global memory to shared memory. + typename GlobalLoadStreamB_, + /// The stream to load A from shared memory. + typename SharedLoadStreamA_, + /// The stream to load B from shared memory. + typename SharedLoadStreamB_, + /// The epilogue. + typename Epilogue_, + /// The block swizzle to reorganize the grid. + typename BlockSwizzle_ = IdentityBlockSwizzle, + /// The index. + typename Index_ = int, + /// The tool used to clear accumulators. + typename ClearAccumulators_ = ClearAccumulators > + +struct GemmTraits { + /// The configuration. + typedef GemmConfig_ GemmConfig; + /// The output tile. + typedef typename GemmConfig::OutputTile OutputTile; + + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStreamA_ GlobalLoadStreamA; + /// The layout of A. + static MatrixLayout::Kind const kLayoutA = GlobalLoadStreamA::kLayout; + /// The scalar for A. + typedef typename GlobalLoadStreamA_::Scalar ScalarA; + + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStreamB_ GlobalLoadStreamB; + /// The layout of B. + static MatrixLayout::Kind const kLayoutB = GlobalLoadStreamB::kLayout; + /// The scalar for B. + typedef typename GlobalLoadStreamB_::Scalar ScalarB; + + /// The iterator for A to load from shared memory. + typedef SharedLoadStreamA_ SharedLoadStreamA; + /// The iterator for B to load from shared memory. + typedef SharedLoadStreamB_ SharedLoadStreamB; + + /// The shared storage for A. + typedef typename GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA; + // Btw, make sure we did not messed up with the size of the storage. + static_assert(sizeof(SharedStoreStorageA) == sizeof(typename SharedLoadStreamA::SharedStorage), + ""); + + /// The shared storage for B. + typedef typename GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB; + // Btw, make sure we did not messed up with the size of the storage. + static_assert(sizeof(SharedStoreStorageB) == sizeof(typename SharedLoadStreamB::SharedStorage), + ""); + + /// The multiply-add functor. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The epilogue. + typedef Epilogue_ Epilogue; + /// The scalars in the epilogue. + typedef typename Epilogue::ScalarC ScalarC; + typedef typename Epilogue::ScalarD ScalarD; + + /// The block swizzle to reorganize the grid. + typedef BlockSwizzle_ BlockSwizzle; + /// The index. + typedef Index_ Index; + /// Clear the accumulators. + typedef ClearAccumulators_ ClearAccumulators; + + /// The params. + struct Params { + /// The dimensions of the GEMM. + Index m, n, k; + /// The params for the A stream. + typename GlobalLoadStreamA::Params global_stream_a; + /// The params for the B stream. + typename GlobalLoadStreamB::Params global_stream_b; + /// The params for the A stream from shared memory. + typename SharedLoadStreamA::Params shared_stream_a; + /// The params for the B stream from shared memory. + typename SharedLoadStreamB::Params shared_stream_b; + /// The params for the epilogue. + typename Epilogue::Params epilogue; + + /// Initialize the parameters. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + // Set the problem size. + this->m = desc.m; + this->n = desc.n; + this->k = desc.k; + + // Initialize the iterator for A. + int error_code = + global_stream_a.initialize(reinterpret_cast(desc.d_a), desc.lda); + + if (error_code) { + return error_code; + } + + // Initialize the iterator for B. + error_code = global_stream_b.initialize(reinterpret_cast(desc.d_b), desc.ldb); + + if (error_code) { + return error_code; + } + + // The epilogue. + return epilogue.initialize(desc); + } + }; + + // The storage for A. + template + union StreamSharedStorage { + // The storage needed by the global stream. + typename GlobalLoadStream_::SharedStorage global; + // The storage needed by the shared stream. + typename SharedLoadStream_::SharedStorage shared; + }; + + // The storage for the main loop + prologue. + struct MainLoopSharedStorage { + // The storage to shuffle the A matrix in shared memory. + StreamSharedStorage stream_a; + // The storage to shuffle the B matrix in shared memory. + StreamSharedStorage stream_b; + // The storage to clear the accumulators if needed. + typename ClearAccumulators::SharedStorage clear; + }; + + /// The storage in shared memory. + union SharedStorage { + // The storage for the main loop. + MainLoopSharedStorage main_loop; + // The storage for the epilogue. + typename Epilogue::SharedStorage epilogue; + }; + + /// Assemble the global load streams for A/B. + struct GlobalLoadStream { + /// Ctor. + CUTLASS_DEVICE GlobalLoadStream(Params const& params, + SharedStorage& shared_storage, + dim3 const& block) + : stream_a(params.global_stream_a, + shared_storage.main_loop.stream_a.global, + cutlass::make_Coord(0, params.k, params.m), + cutlass::make_Coord(0, 0, block.x)), + stream_b(params.global_stream_b, + shared_storage.main_loop.stream_b.global, + cutlass::make_Coord(0, params.k, params.n), + make_Coord(0, 0, block.y)) {} + + /// Trigger the copies from shared memory to registers. + CUTLASS_DEVICE void copy() { + stream_a.copy(); + stream_b.copy(); + } + + /// Commit the data. + CUTLASS_DEVICE void commit() { + stream_a.commit(); + stream_b.commit(); + } + + /// Execute the residue code. + CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) { + stream_a.residue(k, skip_clear); + stream_b.residue(k, skip_clear); + } + + /// The stream for A. + GlobalLoadStreamA stream_a; + /// The stream for B. + GlobalLoadStreamB stream_b; + }; + + /// Assemble the shared load stream for A/B. + struct SharedLoadStream { + /// Ctor. + CUTLASS_DEVICE SharedLoadStream(Params const& params, SharedStorage& shared_storage) { + stream_a.initialize(params.shared_stream_a, shared_storage.main_loop.stream_a.shared); + stream_b.initialize(params.shared_stream_b, shared_storage.main_loop.stream_b.shared); + } + + /// Trigger the copies from shared memory to registers. + CUTLASS_DEVICE void copy(int step) { + stream_a.copy(step, fetched_a[step % 2]); + stream_b.copy(step, fetched_b[step % 2]); + } + + /// Commit the data. + CUTLASS_DEVICE void commit(int step) { + stream_a.commit(fetched_a[step % 2], transformed_a[step % 2]); + stream_b.commit(fetched_b[step % 2], transformed_b[step % 2]); + } + + /// The fragment A. + CUTLASS_DEVICE typename SharedLoadStreamA::Fragment const& fragment_a(int step) const { + return transformed_a[step % 2]; + } + + /// The fragment B. + CUTLASS_DEVICE typename SharedLoadStreamB::Fragment const& fragment_b(int step) const { + return transformed_b[step % 2]; + } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + stream_a.inc_stage(); + stream_b.inc_stage(); + } + + /// The stream for A. + SharedLoadStreamA stream_a; + /// The fragments to fetch A. + typename SharedLoadStreamA::FetchedFragment fetched_a[2]; + /// The fragments to transform A. + typename SharedLoadStreamA::TransformedFragment transformed_a[2]; + /// The stream for B. + SharedLoadStreamB stream_b; + /// The fragments to fetch B. + typename SharedLoadStreamB::FetchedFragment fetched_b[2]; + /// The fragments to transform B. + typename SharedLoadStreamB::TransformedFragment transformed_b[2]; + }; + + /// The memory fence for shared loads. + static CUTLASS_DEVICE void shared_load_fence(bool in_loop) { + if (SharedLoadStreamA::Iterator::kRequiresLoadFence || + SharedLoadStreamB::Iterator::kRequiresLoadFence) { + __syncthreads(); + } + } + + /// The memory fence for shared stores. + static CUTLASS_DEVICE void shared_store_fence(bool in_loop) { __syncthreads(); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct SimplifiedGemmTraitsHelper { + /// The global iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The data converter for A before storing to shared memory. + typedef Copy GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The global iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + /// The data converter for B before storing to shared memory. + typedef Copy GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory. + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream SharedLoadStreamB; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The config for the GEMM. + typename GemmConfig_, + /// The epilogue. + typename Epilogue_, + /// The index. + typename Index_ = int, + // The configuration for the A matrix. + typename GemmTileTraitsHelperA_ = GemmTileTraitsHelperA, + // The configuration for the B matrix. + typename GemmTileTraitsHelperB_ = GemmTileTraitsHelperB, + // The helper class to create the streams and iterators. + typename Helper_ = + SimplifiedGemmTraitsHelper > +struct SimplifiedGemmTraits : public GemmTraits< + // The config. + GemmConfig_, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + Epilogue_, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + ClearAccumulators > { +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/grid_raster.h b/cutlass/gemm/grid_raster.h deleted file mode 100644 index 1f9e585e06..0000000000 --- a/cutlass/gemm/grid_raster.h +++ /dev/null @@ -1,436 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Abstraction for enumerating \p block_task within an input matrix - */ - -#include - -#include "../util/util.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * grid_raster_strategy - ******************************************************************************/ - -/** - * \brief Strategies for enumerating \p block_task within an input matrix - */ -struct grid_raster_strategy -{ - /// \brief Enumerants - enum kind_t - { - /** - * Default \p block_task assignment (currently ColumnMajor for N*, - * RowMajor for TT, and TiledCohort for TN) - */ - Default, - - /** - * Column-major \p block_task assignment - */ - ColumnMajor, - - /** - * Row-major \p block_task assignment - */ - RowMajor, - - /** - * Two-level \p block_task assignment (both column-major) - */ - TiledCohort, - }; -}; - - - -/****************************************************************************** - * grid_raster - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * - * NB: This generic class is not directly constructible. Algorithm-specific - * template specializations will provide the API functionality prescribed here. - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB, ///< View transform enumerant for matrix B - grid_raster_strategy::kind_t RasterStrategy> ///< Strategy for enumerating \p block_task within an input matrix -struct grid_raster -{ - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - grid_raster(); - - /// Whether the thread block base coordinates are out-of-bounds for an m*n matrix C - bool is_block_oob(int m, int n); - - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - static dim3 grid_dims(int m, int n); -}; - - - -/****************************************************************************** - * grid_raster (ColumnMajor specialization) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (ColumnMajor specialization) - * - * Maps thread blocksin column-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - grid_raster_strategy::ColumnMajor> ///< Strategy for enumerating \p block_task within an input matrix -{ - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - inline __device__ - grid_raster() - { - // blockDim.x is the fastest changing grid dim on current architectures - block_item_coords = make_int2( - BlockItemsX * blockIdx.y, - BlockItemsY * blockIdx.x); - } - - /// Whether the base \p block_item_coords are out-of-bounds for an m*n matrix C - inline __device__ - bool is_block_oob(int m, int n) - { - // ColumnMajor never rasterizes fully out-of-bounds thread blocks - return false; - } - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - inline __host__ __device__ - static dim3 grid_dims(int m, int n) - { - // blockDim.x is the fastest changing grid dim on current architectures - return dim3( - (m + BlockItemsY - 1) / BlockItemsY, - (n + BlockItemsX - 1) / BlockItemsX); - } -}; - - - -/****************************************************************************** - * grid_raster (RowMajor specialization) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (RowMajor specialization) - * - * Enumerates \p block_task in row-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - grid_raster_strategy::RowMajor> ///< Strategy for enumerating \p block_task within an input matrix -{ - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - inline __device__ - grid_raster() - { - // blockDim.x is the fastest changing grid dim on current architectures - block_item_coords = make_int2( - BlockItemsX * blockIdx.x, - BlockItemsY * blockIdx.y); - } - - /// Whether the base \p block_item_coords are out-of-bounds for an m*n matrix C - inline __device__ - bool is_block_oob(int m, int n) - { - // RowMajor never rasterizes fully out-of-bounds thread blocks - return false; - } - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - inline __host__ __device__ - static dim3 grid_dims(int m, int n) - { - // blockDim.x is the fastest changing grid dim on current architectures - return dim3( - (n + BlockItemsX - 1) / BlockItemsX, - (m + BlockItemsY - 1) / BlockItemsY); - } - -}; - - - -/****************************************************************************** - * grid_raster (TiledCohort specialization) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (TiledCohort specialization) - * - * Enumerates \p block_task in column-major fashion across "cohort" tiles (where - * cohorts are CohortBlocksY high and CohortBlocksX wide), and enumerates cohorts - * across the matrix in column-major fashion. - * - * Grid layout: - * - gridDim.y is the height of the grid in cohorts - * - gridDim.x is the width of the grid in cohorts multiplied by the number of - * thread blocks per cohort - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformA, ///< View transform enumerant for matrix A - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - TransformA, - TransformB, - grid_raster_strategy::TiledCohort> ///< Strategy for enumerating \p block_task within an input matrix -{ - enum - { - /// Height in thread blocks of a grid rasterization cohort - CohortBlocksY = 2, - - /// Width in thread blocks of a grid rasterization cohort - CohortBlocksX = 2, - - /// Number of thread blocks per cohort - BlocksPerCohort = CohortBlocksY * CohortBlocksX, - - /// Height in items of a grid rasterization cohort - CohortItemsY = CohortBlocksY * BlockItemsY, - - /// Width in items of a grid rasterization cohort - CohortItemsX = CohortBlocksX * BlockItemsX, - - }; - - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /// Thread block's base item coordinates (x, y) in matrix C - int2 block_item_coords; - - /// Constructor - inline __device__ - grid_raster() - { - int block_idx_cohort = blockIdx.x % BlocksPerCohort; - int2 cohort_coords_grid = make_int2( - blockIdx.x / BlocksPerCohort, - blockIdx.y); - - // Cohort is rastered in column-major order - int2 block_coords_cohort = make_int2( - block_idx_cohort / CohortBlocksY, - block_idx_cohort % CohortBlocksY); - - block_item_coords = make_int2( - ((cohort_coords_grid.x * CohortBlocksX) + block_coords_cohort.x) * BlockItemsX, - ((cohort_coords_grid.y * CohortBlocksY) + block_coords_cohort.y) * BlockItemsY); - } - - /// Whether the base \p block_item_coords are out-of-bounds for an m*n matrix C - inline __device__ - bool is_block_oob(int m, int n) - { - /// thread blocks within the cohort may be fully out-of-bounds - return (block_item_coords.x >= n) || (block_item_coords.y >= m); - } - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /// Compute the kernel grid extents (in thread blocks) for consuming an m*n matrix C - inline __host__ __device__ - static dim3 grid_dims(int m, int n) - { - // Extents of C matrix in cohorts - int2 grid_cohort_dims = make_int2( - (n + CohortItemsX - 1) / CohortItemsX, - (m + CohortItemsY - 1) / CohortItemsY); - - return dim3( - grid_cohort_dims.x * BlocksPerCohort, // gridDim.x is width of grid in cohorts * size of cohort in blocks - grid_cohort_dims.y, // gridDim.y is height of grid in cohorts - 1); // gridDim.z is reserved for optional k-splitting - } -}; - - -/****************************************************************************** - * grid_raster (Default specializations) - ******************************************************************************/ - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (Default N* specialization) - * - * Maps thread blocksin column-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX, ///< Width in columns of a block-wide tile in matrix C - matrix_transform_t::kind_t TransformB> ///< View transform enumerant for matrix B -struct grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::NonTranspose, ///< View transform enumerant for matrix A - TransformB, - grid_raster_strategy::Default> ///< Strategy for enumerating \p block_task within an input matrix -: - grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::NonTranspose, - TransformB, - grid_raster_strategy::ColumnMajor> -{}; - - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (Default TT specialization) - * - * Maps thread blocksin row-major fashion - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX> ///< Width in columns of a block-wide tile in matrix C -struct grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, ///< View transform enumerant for matrix A - matrix_transform_t::Transpose, ///< View transform enumerant for matrix B - grid_raster_strategy::Default> ///< Strategy for enumerating \p block_task within an input matrix -: - grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, - matrix_transform_t::Transpose, - grid_raster_strategy::RowMajor> -{}; - - -/** - * \brief Abstraction for enumerating \p block_task within an input matrix - * (Default TN specialization) - * - * Maps thread blocksin blocked cohorts - */ -template < - int BlockItemsY, ///< Height in rows of a block-wide tile in matrix C - int BlockItemsX> ///< Width in columns of a block-wide tile in matrix C -struct grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, ///< View transform enumerant for matrix A - matrix_transform_t::NonTranspose, ///< View transform enumerant for matrix B - grid_raster_strategy::Default> ///< Strategy for enumerating \p block_task within an input matrix -: - grid_raster< - BlockItemsY, - BlockItemsX, - matrix_transform_t::Transpose, - matrix_transform_t::NonTranspose, - grid_raster_strategy::TiledCohort> -{}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/hgemm_global_tile.h b/cutlass/gemm/hgemm_global_tile.h new file mode 100644 index 0000000000..f14dbb311a --- /dev/null +++ b/cutlass/gemm/hgemm_global_tile.h @@ -0,0 +1,90 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Tile traits used to construct global tile iterator for HGEMM. This is intended to + partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate + memory accesses larger than 16 bits. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmCrosswiseGlobalTileTraits : public GemmGlobalTileTraits< + // Which GEMM operand? + kOperand_, + // The layout. + kLayout_, + // The scalar. + Scalar_, + // The tile. + Tile_, + // The threads. + Threads_, + // The number of scalars per LDG/STG. + kAccessSize_> { + /// The base class. + typedef GemmGlobalTileTraits Base; + /// The threads. + typedef typename Base::Threads Threads; + /// The threads strides. + typedef Shape<1, 2, Base::Tile::kC> ThreadsDelta; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The number of iterations needed to load/store the tile. + typedef Shape + Iterations; + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/hgemm_multiply_add.h b/cutlass/gemm/hgemm_multiply_add.h new file mode 100644 index 0000000000..ebbdd06e87 --- /dev/null +++ b/cutlass/gemm/hgemm_multiply_add.h @@ -0,0 +1,104 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Specialization implementing multiply-add operation on half-precision floating point + fragments. +*/ +#pragma once + +#include + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 2, 1> InstructionShape; + /// The number of accumulators per thread. + typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. + typedef half ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. + typedef half ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. + typedef half ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Make sure there's an even number of elements in both dimensions. + static_assert(AccumulatorsPerThread::kH % 2 == 0, "Invalid size"); + static_assert(AccumulatorsPerThread::kW % 2 == 0, "Invalid size"); + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { +#if defined(__CUDACC__) && __CUDA_ARCH__ >= 530 + // The inputs. + __half2 const* a_half2 = reinterpret_cast<__half2 const*>(&a[0]); + __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]); + __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]); + + // The output. + __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]); + + for (int j = 0; j < AccumulatorsPerThread::kH / 2; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW / 2; ++i) { + // The offsets in the output fragment. + int const k0 = (2 * j + 0) * (AccumulatorsPerThread::kW / 2) + i; + int const k1 = (2 * j + 1) * (AccumulatorsPerThread::kW / 2) + i; + + // Compute the product a[i] * b[j].H0_H0. + d_half2[k0] = __hfma2(a_half2[i], __low2half2(b_half2[j]), c_half2[k0]); + // Compute the product a[i] * b[j].H1_H1. + d_half2[k1] = __hfma2(a_half2[i], __high2half2(b_half2[j]), c_half2[k1]); + } + } +#endif + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/hgemm_swizzle.h b/cutlass/gemm/hgemm_swizzle.h new file mode 100644 index 0000000000..ebec0d4680 --- /dev/null +++ b/cutlass/gemm/hgemm_swizzle.h @@ -0,0 +1,94 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in + shared memory for multiplicands. +*/ +#pragma once + +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmSwizzle { + /// The global iterator. + typedef GlobalIterator_ GlobalIterator; + /// The source fragment. + typedef typename GlobalIterator::Fragment Fragment; + /// The shape of the source fragment. + typedef typename GlobalIterator::FragmentShape FragmentShape; + + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + /// The src/dst must be half fragments. + static_assert((platform::is_same::value), "Works on half"); + + /// The number of elements must be a multiple of 2. + static_assert(FragmentShape::kH == 2 && ShapeCount::kWc == 2, "Not multiple of 2"); + + /// Ctor. + CUTLASS_DEVICE HgemmSwizzle() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) { + // Expose src/dst as int arrays. + int const* src_int = reinterpret_cast(&src[0]); + int* dst_int = reinterpret_cast(&dst[0]); + + // Transpose the data. + for (int d = 0; d < FragmentShape::kD; ++d) { + // The indices to read two consecutive "rows". + int const i0 = 2 * d + 0; + int const i1 = 2 * d + 1; + + int a0 = src_int[i0]; + int a1 = src_int[i1]; + + int b0, b1; + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x7632;" : "=r"(b1) : "r"(a0), "r"(a1)); + + // The indices to store with "strides". + int const j0 = 0 * (ShapeCount::kDhw / 2) + d; + int const j1 = 1 * (ShapeCount::kDhw / 2) + d; + + dst_int[j0] = b0; + dst_int[j1] = b1; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/hgemm_traits.h b/cutlass/gemm/hgemm_traits.h new file mode 100644 index 0000000000..78e5bac5b7 --- /dev/null +++ b/cutlass/gemm/hgemm_traits.h @@ -0,0 +1,391 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of half-precision GEMM computation. +*/ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 2, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 2> +struct HgemmConfig + : public GemmConfig< + /// The scalar type for A. + half, + /// The scalar type for B. + half, + /// The scalar type for C. + half, + /// The scalar type for D. + half, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, half, half, half>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 8, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 8, + /// The number of scalars per LDG for C and STG for D. + 2, + /// The number of scalars per STS for D. + 8, + /// The number of scalars per LDS for D. + 2, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTransformerA {}; + +template +struct HgemmTransformerA { + typedef Convert Transformer; +}; + +template +struct HgemmTransformerA { + typedef HgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTransformerB {}; + +template +struct HgemmTransformerB { + typedef Convert Transformer; +}; + +template +struct HgemmTransformerB { + typedef HgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperA : public GemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperA + : public GemmTileTraitsHelperA { + /// The base config. + typedef GemmTileTraitsHelperA Base; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef HgemmCrosswiseGlobalTileTraits< + GemmOperand::kA, + // The layout. + MatrixLayout::kRowMajor, + // The pointer. + half const, + // The tile has size MxK in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K ) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc) + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for A^T. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer. + half, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32(the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + 2, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for A^T. + typedef GemmSharedLoadTileATraits< + // The pointer. + half const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + 8, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperB : public GemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct HgemmTileTraitsHelperB + : public GemmTileTraitsHelperB { + /// The base config. + typedef GemmTileTraitsHelperB Base; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef HgemmCrosswiseGlobalTileTraits< + GemmOperand::kB, + // The layout. + MatrixLayout::kColumnMajor, + // The pointer. + half const, + // The tile has size KxN in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc) + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreWithSkewTileAbTraits< + // The pointer. + half, + // The tile has size KxN in GEMM's terminology. + Shape, + // The threads are distributed as (threads / K) x K (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + 2, + // The skew to avoid bank conflicts added in the tile W dimension. + 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> + SharedStoreTileTraits; + + /// The traits class to build the iterator to load from shared memory for B^N. + typedef GemmSharedLoadTileBTraits< + // The pointer. + half const, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The number of threads per warp. + typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, + // The shape of the FMA instruction. + typename GemmConfig_::InstructionShape, + // The number of stages. + GemmConfig_::kStages, + // The number of scalars per LDS. + 8, + // The skew. + SharedStoreTileTraits::kSkew> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// The number of halfs loaded in one LDG for A. + int kScalarsPerLdgA_ = 2, + /// The number of halfs loaded in one LDG for B. + int kScalarsPerLdgB_ = 2, + /// The index. + typename Index_ = int> +struct HgemmTraitsHelper { + /// The HGEMM config. + typedef HgemmConfig + GemmConfig; + /// The GEMM config for A. + typedef HgemmTileTraitsHelperA GemmTileTraitsHelperA; + /// The GEMM config for B. + typedef HgemmTileTraitsHelperB GemmTileTraitsHelperB; + + /// The iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The default transformer for A. + typedef typename HgemmTransformerA::Transformer GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + // The default transformer for B. + typedef typename HgemmTransformerB::Transformer GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream SharedLoadStreamB; + + /// The functor to do the multiply-add in the main loop. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The object to clear accumulators. + typedef ClearAccumulators ClearAccumulators; + + /// The traits class for the epilogue. + typedef SimplifiedGemmEpilogueTraits GemmEpilogueTraits; + /// The epilogue. + typedef GemmEpilogue Epilogue; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 128, 128>, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<8, 8, 16>, + /// The number of halfs loaded in one LDG for A. + int kScalarsPerLdgA_ = 2, + /// The number of halfs loaded in one LDG for B. + int kScalarsPerLdgB_ = 2, + /// The index. + typename Index_ = int, + /// The helper class. + typename Helper_ = HgemmTraitsHelper > +struct HgemmTraits : public GemmTraits< + // The config. + typename Helper_::GemmConfig, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + typename Helper_::Epilogue, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + typename Helper_::ClearAccumulators> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/identity_block_swizzle.h b/cutlass/gemm/identity_block_swizzle.h new file mode 100644 index 0000000000..e1bdb2e003 --- /dev/null +++ b/cutlass/gemm/identity_block_swizzle.h @@ -0,0 +1,48 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies functors for mapping blockIdx to partitions of the GEMM computation. + + Currently, we only implement an identity mapping. +*/ +#pragma once + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +struct IdentityBlockSwizzle { + /// Ctor. + CUTLASS_DEVICE IdentityBlockSwizzle() {} + + /// Swizzle the block index. + CUTLASS_DEVICE dim3 swizzle() { return blockIdx; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_epilogue.h b/cutlass/gemm/igemm_epilogue.h new file mode 100644 index 0000000000..0d69980316 --- /dev/null +++ b/cutlass/gemm/igemm_epilogue.h @@ -0,0 +1,320 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and + floating-point output matrix formats. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmFloatToInt8Converter { + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + // We are packing 4 floats into int32 registers so we need kElements to be multiple of 4. + static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4"); + + /// Ctor. + CUTLASS_DEVICE IgemmFloatToInt8Converter() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + transform(src, 0, dst); + } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) { + // The inputs. + float4 const* src_f4 = reinterpret_cast(&src[0]); + // The outputs. + int* dst_int = reinterpret_cast(&dst[0]); + + // Iterate over the floats and pack them together to produce ints. + for (int i = 0; i < kElements_ / 4; ++i) { + // Read the float4. + float4 f4 = src_f4[i]; + + // Clamp the 4 elements of the floats to the [-128, +127] range. + float x = fmaxf(-128.f, fminf(127.f, f4.x)); + float y = fmaxf(-128.f, fminf(127.f, f4.y)); + float z = fmaxf(-128.f, fminf(127.f, f4.z)); + float w = fmaxf(-128.f, fminf(127.f, f4.w)); + + // Convert to integers. + int ix = (int)x; + int iy = (int)y; + int iz = (int)z; + int iw = (int)w; + + // Extract the lower bytes to build an int32 with 4 int8. + asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(ix) : "r"(iy)); + asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(iz) : "r"(iw)); + asm volatile("prmt.b32 %0, %0, %1, 0x5410;" : "+r"(ix) : "r"(iz)); + + // Store the int. + dst_int[i] = ix; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmGlobalStoreTransformer { + typedef Convert, OutputFragment_> Transformer; +}; + +template +struct IgemmGlobalStoreTransformer > { + typedef IgemmFloatToInt8Converter Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmInt8ToFloatConverter { + /// The input fragment. + typedef Fragment InputFragment; + /// The output fragment. + typedef Fragment OutputFragment; + + // We are unpacking 4 int8s from int32. + static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4"); + + /// Ctor. + CUTLASS_DEVICE IgemmInt8ToFloatConverter() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) { + transform(src, 0, dst); + } + + /// Transform a fragment. + template + CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) { + // The inputs. + int const* src_int = reinterpret_cast(&src[0]); + // The outputs. + float4* dst_f4 = reinterpret_cast(&dst[0]); + + // Iterate over the int8 and unpack them together to produce floats. + for (int i = 0; i < kElements_ / 4; ++i) { + // Read the int. + int ix, iy, iz, iw = src_int[i]; + + // Extract the 4 bytes. + asm volatile("prmt.b32 %0, 0x0, %1, 0x4440;" : "=r"(ix) : "r"(iw)); + asm volatile("prmt.b32 %0, 0x0, %1, 0x4441;" : "=r"(iy) : "r"(iw)); + asm volatile("prmt.b32 %0, 0x0, %1, 0x4442;" : "=r"(iz) : "r"(iw)); + asm volatile("prmt.b32 %0, 0x0, %1, 0x4443;" : "=r"(iw) : "r"(iw)); + + // The floats. + float fx, fy, fz, fw; + + // Convert to floats (make sure we generate I2F.F32.S8). + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fx) : "r"(ix)); + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fy) : "r"(iy)); + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fz) : "r"(iz)); + asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fw) : "r"(iw)); + + // Store the float4. + dst_f4[i] = make_float4(fx, fy, fz, fw); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmGlobalLoadTransformer { + typedef Convert > Transformer; +}; + +template +struct IgemmGlobalLoadTransformer, float> { + typedef IgemmInt8ToFloatConverter Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmSharedStoreTransformer { + typedef Convert, OutputFragment_> Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogueTraitsHelper + : public GemmEpilogueTraitsHelper { + /// The base class. + typedef GemmEpilogueTraitsHelper Base; + /// The config. + typedef IgemmConfig_ IgemmConfig; + + /// The scalar type of the epilogue. + typedef typename Base::Scalar Scalar; + /// The iterations. + typedef typename Base::Iterations Iterations; + /// The iterations strides. + typedef typename Base::Delta Delta; + + /// The traits class for the iterator. + typedef typename Base::GlobalLoadTileTraits GlobalLoadTileTraits; + /// The iterator to store to shared memory. + typedef GemmGlobalIteratorCd GlobalLoadIteratorC; + /// The fragment that needs to be produced by the load iterator. + typedef typename GlobalLoadIteratorC::Fragment GlobalFragmentC; + /// The transformer from loaded data to math fragment. + typedef + typename IgemmGlobalLoadTransformer::Transformer GlobalTransformerC; + + /// The traits class for the iterator. + typedef typename Base::GlobalStoreTileTraits GlobalStoreTileTraits; + /// The iterator to store to shared memory. + typedef GemmGlobalIteratorCd GlobalStoreIteratorD; + /// The fragment that needs to be passed to that store iterator. + typedef typename GlobalStoreIteratorD::Fragment GlobalFragmentD; + /// The transformer from accumulators to shared memory fragments. + typedef + typename IgemmGlobalStoreTransformer::Transformer GlobalTransformerD; + + /// The traits class for the shared iterator to store D to shared memory. + typedef typename Base::SharedStoreTileTraits SharedStoreTileTraits; + /// The shared iterator to store D to shared memory. + typedef TileStoreIterator + SharedStoreIteratorD; + /// The fragment that needs to be passed to that store iterator. + typedef typename SharedStoreIteratorD::Fragment SharedStoreFragmentD; + /// The transformer from accumulators to shared memory fragments. + typedef typename IgemmSharedStoreTransformer::Transformer + SharedStoreTransformerD; + /// The traits class for the shared iterator to load D from shared memory. + typedef typename Base::SharedLoadTileTraits SharedLoadTileTraits; + /// The shared iterator to load D from shared memory. + typedef TileLoadIterator + SharedLoadIteratorD; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The config. + typename IgemmConfig_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The index. + typename Index_ = int, + /// The helper class to assemble the traits. + typename Helper_ = IgemmEpilogueTraitsHelper > +struct IgemmEpilogueTraits : public GemmEpilogueTraits< + // The output tile. + typename IgemmConfig_::OutputTile, + // The accumulators. + typename IgemmConfig_::Accumulators, + // The global iterator for C. + typename Helper_::GlobalLoadIteratorC, + // The transformer for C. + typename Helper_::GlobalTransformerC, + // The transformer for D. + typename Helper_::GlobalTransformerD, + // The global iterator for D. + typename Helper_::GlobalStoreIteratorD, + // The iterator to store D to shared memory. + typename Helper_::SharedStoreIteratorD, + // The shared store transformer for D. + typename Helper_::SharedStoreTransformerD, + // The iterator to load D from shared memory. + typename Helper_::SharedLoadIteratorD, + // The iterations. + typename Helper_::Iterations, + // The strides between iterations. + typename Helper_::Delta, + // The functor to be used in the epilogue. + EpilogueFunctor_, + // The index. + Index_> { + /// Do we output in int8? + static bool const kInt8Output = + platform::is_same::value != 0; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogue : public GemmEpilogue { + /// The base class. + typedef GemmEpilogue Base; + + /// Ctor. + CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_, + typename Base::SharedStorage& shared_storage_, + typename Base::Index m_, + typename Base::Index n_) + : Base(params_, shared_storage_, m_, n_) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogue : public GemmEpilogue { + /// The base class. + typedef GemmEpilogue Base; + + /// Ctor. + CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_, + typename Base::SharedStorage& shared_storage_, + typename Base::Index m_, + typename Base::Index n_) + : Base(params_, shared_storage_, m_, n_) {} +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_global_tile.h b/cutlass/gemm/igemm_global_tile.h new file mode 100644 index 0000000000..6993c631f3 --- /dev/null +++ b/cutlass/gemm/igemm_global_tile.h @@ -0,0 +1,95 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements tile iterators to partition the thread block tile into 2D subtiles and + efficiently load each. Applies permute transformation to construct 'interleaved K-strided' + data layout in which 4-element dot products from the same K index are arranged in consecutive + locations within shared memory. + + Supports efficient loads from shared memory to target the DP4A instruction. +*/ +#pragma once + +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmContiguousGlobalTileTraits : public GemmGlobalTileTraits< + // Which GEMM operand? + kOperand_, + // The layout. + kLayout_, + // The scalar. + Scalar_, + // The tile. + Tile_, + // The threads. + Threads_, + // The number of scalars per LDG/STG. + kAccessSize_> { + /// The base class. + typedef GemmGlobalTileTraits Base; + /// The threads. + typedef typename Base::Threads Threads; + /// The strides in each dimension between different loads/stores. + typedef Shape Delta; + /// The number of iterations needed to load/store the tile. + typedef Shape + Iterations; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH; + int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; + + public: + /// The threads strides. + typedef Shape<1, 4, Base::Tile::kC> ThreadsDelta; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_multiply_add.h b/cutlass/gemm/igemm_multiply_add.h new file mode 100644 index 0000000000..5a8baec533 --- /dev/null +++ b/cutlass/gemm/igemm_multiply_add.h @@ -0,0 +1,89 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements matrix multiply accumulate operation of 8-bit integer data using DP4A + instruction. +*/ +#pragma once + +#include + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<4, 1, 1> InstructionShape; + /// The number of accumulators per thread. + typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. + typedef int8_t ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. + typedef int8_t ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. + typedef int ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + // The inputs. + int const* a_int = reinterpret_cast(&a[0]); + int const* b_int = reinterpret_cast(&b[0]); + + for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { + asm volatile("dp4a.s32.s32 %0, %1, %2, %3;" + : "=r"(d[j * AccumulatorsPerThread::kW + i]) + : "r"(a_int[i]), "r"(b_int[j]), "r"(c[j * AccumulatorsPerThread::kW + i])); + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_swizzle.h b/cutlass/gemm/igemm_swizzle.h new file mode 100644 index 0000000000..77cf7118df --- /dev/null +++ b/cutlass/gemm/igemm_swizzle.h @@ -0,0 +1,115 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Transposes a fragment of data containing packed 8-bit integer elements. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmSwizzle { + /// The global iterator. + typedef GlobalIterator_ GlobalIterator; + /// The source fragment. + typedef typename GlobalIterator::Fragment Fragment; + /// The shape of the source fragment. + typedef typename GlobalIterator::FragmentShape FragmentShape; + + /// The source fragment. + typedef Fragment InputFragment; + /// The destination fragment. + typedef Fragment OutputFragment; + + /// The src/dst must be int8 fragments. + static_assert((platform::is_same::value), "Works on int8"); + + /// The number of elements must be a multiple of 4. + static_assert(FragmentShape::kH % 4 == 0 && ShapeCount::kWc % 4 == 0, + "Not multiple of 4"); + + /// Ctor. + CUTLASS_DEVICE IgemmSwizzle() {} + + /// Transform a fragment. + CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) { + // Expose src/dst as int arrays. + int const* src_int = reinterpret_cast(&src[0]); + int* dst_int = reinterpret_cast(&dst[0]); + + // Transpose the data. + for (int d = 0; d < FragmentShape::kD; ++d) { + for (int h = 0; h < FragmentShape::kH / 4; ++h) { + for (int w = 0; w < ShapeCount::kWc / 4; ++w) { + int const i0 = d * (ShapeCount::kHwc / 4) + + (4 * h + 0) * (ShapeCount::kWc / 4) + w; + int const i1 = d * (ShapeCount::kHwc / 4) + + (4 * h + 1) * (ShapeCount::kWc / 4) + w; + int const i2 = d * (ShapeCount::kHwc / 4) + + (4 * h + 2) * (ShapeCount::kWc / 4) + w; + int const i3 = d * (ShapeCount::kHwc / 4) + + (4 * h + 3) * (ShapeCount::kWc / 4) + w; + + int a0 = src_int[i0]; + int a1 = src_int[i1]; + int a2 = src_int[i2]; + int a3 = src_int[i3]; + + int b0, b1, b2, b3, c0; + asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(b0), "r"(c0)); + + asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(b1) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b1) : "r"(b1), "r"(c0)); + + asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(b2) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b2) : "r"(b2), "r"(c0)); + + asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(b3) : "r"(a0), "r"(a1)); + asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3)); + asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0)); + + dst_int[i0] = b0; + dst_int[i1] = b1; + dst_int[i2] = b2; + dst_int[i3] = b3; + } + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/igemm_traits.h b/cutlass/gemm/igemm_traits.h new file mode 100644 index 0000000000..9e8b936541 --- /dev/null +++ b/cutlass/gemm/igemm_traits.h @@ -0,0 +1,393 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed + to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output + formats vary. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The output type. + typename ScalarD_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_> +struct IgemmConfig + : public GemmConfig< + /// The scalar type for A. + int8_t, + /// The scalar type for B. + int8_t, + /// The scalar type for C. + ScalarD_, + /// The scalar type for D. + ScalarD_, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, int8_t, int8_t, int>, + /// The number of scalars per LDG for A. + 4, + /// The number of scalars per STS for A. + 4, + /// The number of scalars per LDS for A. + 16, + /// The number of scalars per LDG for B. + 4, + /// The number of scalars per STS for B. + 4, + /// The number of scalars per LDS for B. + 16, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmConfig + : public GemmConfig< + /// The scalar type for A. + int8_t, + /// The scalar type for B. + int8_t, + /// The scalar type for C. + int8_t, + /// The scalar type for D. + int8_t, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, int8_t, int8_t, int>, + /// The number of scalars per LDG for A. + 4, + /// The number of scalars per STS for A. + 4, + /// The number of scalars per LDS for A. + 16, + /// The number of scalars per LDG for B. + 4, + /// The number of scalars per STS for B. + 4, + /// The number of scalars per LDS for B. + 16, + /// The number of scalars per LDG for C and STG for D. + 4, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 4, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperA : public GemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperA + : public GemmTileTraitsHelperA { + /// The base config. + typedef GemmTileTraitsHelperA Base; + + /// The number of scalars per LDG/STS/LDS for A. + static int const kScalarsPerStsA = 16; + + /// The traits class to build the iterator to load data from global memory for A^N. + typedef IgemmContiguousGlobalTileTraits< + GemmOperand::kA, + // The layout. + MatrixLayout::kColumnMajor, + // The pointer is float const. + int8_t const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + 4> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + int8_t, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + kScalarsPerStsA> + SharedStoreTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperB : public GemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTileTraitsHelperB + : public GemmTileTraitsHelperB { + /// The base config. + typedef GemmTileTraitsHelperB Base; + + /// The number of scalars per LDG/STS/LDS for B. + static int const kScalarsPerStsB = 16; + + /// The traits class to build the iterator to load data from global memory for B^T. + typedef IgemmContiguousGlobalTileTraits< + GemmOperand::kB, + // The layout. + MatrixLayout::kRowMajor, + // The pointer is float const. + int8_t const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + 4> + GlobalTileTraits; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer is float. + int8_t, + // The tile has size KxM in GEMM's terminology. + Shape, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + kScalarsPerStsB> + SharedStoreTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTransformerA {}; + +template +struct IgemmTransformerA { + typedef Copy Transformer; +}; + +template +struct IgemmTransformerA { + typedef IgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmTransformerB {}; + +template +struct IgemmTransformerB { + typedef Copy Transformer; +}; + +template +struct IgemmTransformerB { + typedef IgemmSwizzle Transformer; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_, + /// The output type. + typename ScalarD_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// The index. + typename Index_ = int> +struct IgemmTraitsHelper { + /// The IGEMM config. + typedef IgemmConfig GemmConfig; + /// The GEMM config for A. + typedef IgemmTileTraitsHelperA GemmTileTraitsHelperA; + /// The GEMM config for B. + typedef IgemmTileTraitsHelperB GemmTileTraitsHelperB; + + /// The iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The default transformer for A. + typedef typename IgemmTransformerA::Transformer GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + // The default transformer for B. + typedef typename IgemmTransformerB::Transformer GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory. + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream > + SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream > + SharedLoadStreamB; + + /// The multiply-add functor. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The object to clear accumulators. + typedef ClearAccumulators ClearAccumulators; + + /// The epilogue. + typedef IgemmEpilogue > Epilogue; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct IgemmEpilogueScalar { + typedef float Scalar; +}; + +template <> +struct IgemmEpilogueScalar { + typedef int Scalar; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<32, 128, 128>, + /// The output type. + typename ScalarD_ = int, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_ = LinearScaling::Scalar>, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<32, 8, 8>, + /// The index. + typename Index_ = int, + /// The helper class. + typename Helper_ = IgemmTraitsHelper > +struct IgemmTraits : public GemmTraits< + // The config. + typename Helper_::GemmConfig, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + typename Helper_::Epilogue, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + typename Helper_::ClearAccumulators> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/k_split_control.h b/cutlass/gemm/k_split_control.h deleted file mode 100644 index 7a332b0bed..0000000000 --- a/cutlass/gemm/k_split_control.h +++ /dev/null @@ -1,310 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Abstraction for coordinating inter-block k-splitting - */ - -#include - -#include "../util/util.h" - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * Storage and initialization - ******************************************************************************/ - -enum -{ - NumFlagsSplitK = 4096 -}; - - -/** - * Global K-split semaphore flags - * - * TODO: use demand-allocated storage to provide copies for concurrent streams - */ -__device__ int d_flags_split_k[NumFlagsSplitK]; - - -/** - * Preparation kernel for zero-initializing semaphore flags - */ -__global__ void prepare_kernel(int *d_flags_split_k) -{ - int tid = (blockIdx.x * blockDim.x) + threadIdx.x; - if (tid < NumFlagsSplitK) - d_flags_split_k[tid] = 0; -} - - -/****************************************************************************** - * k_split_control - ******************************************************************************/ - -/** - * \brief Abstraction for coordinating inter-block k-splitting - */ -struct k_split_control -{ - /// Extent of a thread block's partition along the GEMM K-axis - int split_k; - - /// Whether or not to use a semaphore for inter-block k-splitting. - bool use_semaphore; - - /// Pointer to semaphore - int *d_flags; - - - - //------------------------------------------------------------------------- - // Device API - //------------------------------------------------------------------------- - - /** - * Return the thread block's starting coordinate (k) within the - * multiplicand matrices - */ - inline __device__ - int block_begin_item_k() - { - return blockIdx.z * split_k; - } - - - /** - * Return the thread block's ending coordinate (k) within the multiplicand - * matrices (one-past) - */ - inline __device__ - int block_end_item_k(int dim_k) - { - int next_start_k = block_begin_item_k() + split_k; - return __NV_STD_MIN(next_start_k, dim_k); - } - - - /** - * Whether the thread block is a secondary accumulator in an inter-block - * k-splitting scheme - */ - inline __device__ - bool is_secondary_accumulator() - { - return (blockIdx.z > 0); - } - - - /** - * Wait for predecessor thread block(s) to produce the exclusive - * partial-sums for this block-wide tile - */ - inline __device__ - void wait() - { - // Wait on semaphore - if ((use_semaphore) && (blockIdx.z > 0)) - { - if (threadIdx.x == 0) - { - int bid = (blockIdx.y * gridDim.x) + blockIdx.x; - int hash = bid % NumFlagsSplitK; - int found; - int looking = blockIdx.z; - while (true) - { - asm volatile ("ld.global.cg.u32 %0, [%1];\n" : "=r"(found) : "l"(d_flags + hash)); - - if (found == looking) - break; - - /// Fence to keep load from being hoisted from the loop - __syncwarp(0x00000001); - } - } - - __syncthreads(); - } - } - - - /** - * Signal the successor thread_block(s) that the inclusive partial-sums - * from this block-wide tile are available - */ - inline __device__ - void signal() - { - if (use_semaphore) - { - __syncthreads(); - - if (threadIdx.x == 0) - { - int bid = (blockIdx.y * gridDim.x) + blockIdx.x; - int hash = bid % NumFlagsSplitK; - int val = blockIdx.z + 1; - - asm volatile ("st.global.cg.u32 [%0], %1;\n" : : "l"(d_flags + hash), "r"(val)); - } - } - } - - - //------------------------------------------------------------------------- - // Grid launch API - //------------------------------------------------------------------------- - - /** - * Constructor - */ - inline - k_split_control( - int *d_flags, - int sm_count, - int max_sm_occupancy, - int dim_k, - int block_tile_items_k, - dim3 block_dims, - dim3 &grid_dims) ///< [in,out] - : - d_flags(d_flags), - split_k(dim_k) - { - // Compute wave efficiency - float wave_efficiency = get_wave_efficiency( - sm_count, - max_sm_occupancy, - block_dims, - grid_dims); - - // Update split-k if wave efficiency is less than some threshold - if (wave_efficiency < 0.9) - { - int num_threadblocks = grid_dims.x * grid_dims.y * grid_dims.z; - - // Ideal number of thread blocks in grid - int ideal_threadblocks = lcm(sm_count, num_threadblocks); - - // Desired number of partitions to split K-axis into - int num_partitions = ideal_threadblocks / num_threadblocks; - - // Compute new k-split share - int new_split_k = (dim_k + num_partitions - 1) / num_partitions; - - // Round split_k share to the nearest block_task_policy_t::BlockItemsK - new_split_k = round_nearest(new_split_k, block_tile_items_k); - - // Recompute k-splitting factor with new_split_k - num_partitions = (dim_k + new_split_k - 1) / new_split_k; - - // Update grid dims and k if we meet the minimum number of iterations worth the overhead of splitting - int min_iterations_k = 8; - - if (((new_split_k / block_tile_items_k) > min_iterations_k) && // We're going to go through at least this many k iterations - (sm_count * max_sm_occupancy < NumFlagsSplitK)) // We have enough semaphore flags allocated - { - grid_dims.z = num_partitions; - split_k = new_split_k; - } - } - - use_semaphore = (grid_dims.z > 1); - } - - - /** - * Initializer - */ - cudaError_t prepare( - cudaStream_t stream, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console if DEBUG is defined. Default is \p false. - - { - cudaError error = cudaSuccess; - - if (use_semaphore) - { - int block_threads = 128; - int grid_dims = (NumFlagsSplitK + block_threads - 1) / block_threads; - - prepare_kernel<<>>(d_flags); - - // Check for failure to launch - if (CUDA_PERROR_DEBUG(error = cudaPeekAtLastError())) - return error; - - // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CUDA_PERROR_DEBUG(error = cudaStreamSynchronize(stream)))) - return error; - } - - return error; - } - - - /** - * Compute the efficiency of dispatch wave quantization - */ - float get_wave_efficiency( - int sm_count, - int max_sm_occupancy, - dim3 block_dims, - dim3 grid_dims) - { - // Heuristic for how many warps are needed to saturate an SM for a given - // multiply-accumulate genre. (NB: We could make this more rigorous by - // specializing on data types and SM width) - int saturating_warps_per_sm = 16; - - int num_threadblocks = grid_dims.x * grid_dims.y * grid_dims.z; - int threads_per_threadblock = block_dims.x * block_dims.y; - int warps_per_threadblock = threads_per_threadblock / 32; - int saturating_threadblocks_per_sm = (saturating_warps_per_sm + warps_per_threadblock - 1) / warps_per_threadblock; - - int saturating_residency = sm_count * saturating_threadblocks_per_sm; - int full_waves = num_threadblocks / saturating_residency; - int remainder_threadblocks = num_threadblocks % saturating_residency; - int total_waves = (remainder_threadblocks == 0) ? full_waves : full_waves + 1; - - float last_wave_saturating_efficiency = float(remainder_threadblocks) / saturating_residency; - - return (float(full_waves) + last_wave_saturating_efficiency) / total_waves; - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/linear_scaling.h b/cutlass/gemm/linear_scaling.h new file mode 100644 index 0000000000..05afaea197 --- /dev/null +++ b/cutlass/gemm/linear_scaling.h @@ -0,0 +1,86 @@ + +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements the BLAS linear scaling function alpha*AB + beta*C +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Functor to compute linear combination of fragments +template > +struct LinearScaling { + // The scalar. + typedef Scalar_ Scalar; + // The adapater. + typedef FragmentMultiplyAdd_ FragmentMultiplyAdd; + + /// The parameters. + struct Params { + /// The alpha/beta scaling params. + Scalar alpha, beta; + + /// Initialize the parameters. + template + CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) { + alpha = desc.alpha; + beta = desc.beta; + return 0; + } + }; + + /// Ctor. + CUTLASS_DEVICE LinearScaling(Params const& params) : alpha(params.alpha), beta(params.beta) {} + + /// Evaluate the functor. + template + CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_& output) { + FragmentMultiplyAdd mad; + mad.multiply(alpha, accum, output); + } + + /// Evaluate the functor. + template + CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_ const& old, Fragment_& output) { + FragmentMultiplyAdd mad; + Fragment_ tmp; + mad.multiply(beta, old, tmp); + mad.multiply_add(alpha, accum, tmp, output); + } + + /// The alpha/beta scaling factors. + Scalar alpha, beta; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/sgemm_traits.h b/cutlass/gemm/sgemm_traits.h new file mode 100644 index 0000000000..66b7677486 --- /dev/null +++ b/cutlass/gemm/sgemm_traits.h @@ -0,0 +1,127 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of single-precision GEMM. +*/ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 1> +struct SgemmConfig + : public GemmConfig< + /// The scalar type for A. + float, + /// The scalar type for B. + float, + /// The scalar type for C. + float, + /// The scalar type for D. + float, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + ThreadMultiplyAdd, float, float, float>, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 4, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 4, + /// The number of scalars per LDG for C and STG for D. + 1, + /// The number of scalars per STS for D. + 4, + /// The number of scalars per LDS for D. + 1, + /// The number of stages in shared memory. + 2> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_ = Shape<8, 128, 128>, + /// The functor to use in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The number of accumulators per thread. + typename AccumulatorsPerThread_ = Shape<8, 8, 8>, + /// The number of floats loaded in one LDG for A. + int kScalarsPerLdgA_ = 1, + /// The number of floats loaded in one LDG for B. + int kScalarsPerLdgB_ = 1, + /// The index. + typename Index_ = int, + /// The SGEMM config. + typename GemmConfig_ = + SgemmConfig, + /// The traits class for the epilogue. + typename GemmEpilogueTraits_ = + SimplifiedGemmEpilogueTraits > +struct SgemmTraits : public SimplifiedGemmTraits< + // The layout for A. + kLayoutA_, + // The layout for B. + kLayoutB_, + // The config. + GemmConfig_, + // The epilogue. + GemmEpilogue, + // The index. + Index_> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/thread_accumulator.h b/cutlass/gemm/thread_accumulator.h deleted file mode 100644 index 462e18943f..0000000000 --- a/cutlass/gemm/thread_accumulator.h +++ /dev/null @@ -1,469 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Thread-level multiply-accumulate abstraction - */ - -#include "../util/util.h" -#include "dp_accummulate.h" - - -namespace cutlass { -namespace gemm { - - -/****************************************************************************** - * thread_accumulator (generic specialization) - ******************************************************************************/ - -/** - * \brief Thread-level multiply-accumulate abstraction (generic specialization) - * - * The thread_accumulator class maintains a MxN tile of accumulators in - * registers to which MxNxK matrix products of two thread tiles A (MxK) - * and B (KxN) can be added, where: - * M = ThreadItemsY - * N = ThreadItemsX - * K = sizeof(dp_vector_t) / sizeof(value_t). - * - * In order to leverage architecture-specific "dot-product accumulate" ISA - * operations, K is dictated by the thread_accumulator class in the form of - * the member-type dp_vector_t, which defines a K-component vector of value_t. - * The multiplicand inputs A and B are provided as arrays of dp_vector_t having - * extents ThreadItemsY and ThreadItemsX, respectively. (In the single - * component "dp1" scenario where dp_vector_t == value_t and thus K == 1, the - * multiplication is simply the outer product of two vectors.) - * - * The accumulators are zero-initialized in a two-phase process (construction + - * initialization) that requires shared storage in the form of the member-type - * scratch_storage_t during construction. (A single scratch_storage_t instance - * can be uniformly referenced across all threads in the block during - * construction *if* the block is synchronized between construction and - * initialization.) - * - * NB: This generic class is not directly constructible. Architecture- and - * algorithm-specific template specializations will provide the API - * functionality prescribed here. - */ -template < - int ThreadItemsY, ///< Height of thread tile in accum_t - int ThreadItemsX, ///< Width of thread tile in accum_t - typename value_t, ///< Multiplicand value type - typename accum_t, ///< Accumulator value type - int ACCUM_BYTES = ///< Size in bytes of accum_t - sizeof(accum_t), - arch_family_t::kind_t ArchFamily = ///< Architectural family enumerant - CUTLASS_ARCH_FAMILY> -struct thread_accumulator -{ -protected: - - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Specialized dot-product traits type - typedef dp_accummulate dp_accum_traits_t; - - -public: - - //------------------------------------------------------------------------- - // Member types - //------------------------------------------------------------------------- - - /// Dot-product vector type - typedef typename dp_accum_traits_t::dp_vector_t dp_vector_t; - - /// Scratch storage layout - struct scratch_storage_t {}; - - -protected: - - //------------------------------------------------------------------------- - // Data members - //------------------------------------------------------------------------- - - /// Thread's tile of accumulators - accum_t accumulators[ThreadItemsY][ThreadItemsX]; - - - //------------------------------------------------------------------------- - // Utility methods - //------------------------------------------------------------------------- - - /** - * Compute a multiply-add at accumulator coordinates (x, y) - */ - inline __device__ - void mad_xy( - dp_vector_t (&tile_a)[ThreadItemsY], - dp_vector_t (&tile_b)[ThreadItemsX], - int x, - int y) - { - dp_accum_traits_t::mad( - accumulators[y][x], - tile_a[y], - tile_b[x], - accumulators[y][x]); - } - -public: - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - thread_accumulator( - scratch_storage_t &scratch) - {} - - - //------------------------------------------------------------------------- - // Accumulator API - //------------------------------------------------------------------------- - - /** - * \brief Zero-initialize thread accumulators. - * - * If a common reference to a single block-wide shared instance of scratch_storage_t - * is used during construction, the block must be synchronized after construction - * but prior to the invocation of init(). - */ - inline __device__ - void init() - { - #pragma unroll - for (int y = 0; y < ThreadItemsY; ++y) { - #pragma unroll - for (int x = 0; x < ThreadItemsX; ++x) - { - accumulators[y][x] = accum_t(0); - } - } - } - - - /** - * Retrieve the accumulator at thread tile coordinates (x, y) - */ - inline __device__ - accum_t get(int x, int y) - { - // Accumulators are row-major - return accumulators[y][x]; - } - - - /** - * \brief Compute the product of tile_a and tile_b and add the result to - * the tile of accumulators. - */ - inline __device__ - void multiply_accumulate( - dp_vector_t (&tile_a)[ThreadItemsY], - dp_vector_t (&tile_b)[ThreadItemsX]) - { - // Simply traverse the accumulator tile in row-major order - #pragma unroll - for (int y = 0; y < ThreadItemsY; ++y) - { - #pragma unroll - for (int x = 0; x < ThreadItemsX; ++x) - { - mad_xy(tile_a, tile_b, x, y); - } - } - } -}; - - - - -/****************************************************************************** - * thread_accumulator (__half->__half specialization) - ******************************************************************************/ - -/** - * \brief Thread-level multiply-accumulate abstraction (__half->__half specialization) - * - * NB: Because we use the 2-item SIMD instruction HFMA2: - * - ThreadItemsX must be an even multiple of 2 - * - ThreadItemsY must be an even multiple of 2 - * - */ -template < - int ThreadItemsY, ///< Height in rows of thread tile in C - int ThreadItemsX, ///< Width in columns of thread tile in C - arch_family_t::kind_t ArchFamily> ///< Architectural family enumerant -struct thread_accumulator< - ThreadItemsY, - ThreadItemsX, - __half, ///< Multiplicand value type (matrices A and B) - __half, ///< Accumulator value type (matrix C and scalars) - 2, ///< Size in bytes of accum_t - ArchFamily> -{ -protected: - - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - /// Constants - enum - { - /// Height of thread tile in column-major uint32_t SIMD pairs along Y dimension - ThreadTilePairsY = divide_assert::value, - - /// Width of thread tile in column-major uint32_t SIMD pairs along X dimension - ThreadTilePairsX = ThreadItemsX, - - /// Number of SIMD pairs in thread's slice of block-wide tile multiplicand A - ThreadPairsA = divide_assert::value, - - /// Number of SIMD pairs in thread's slice of block-wide tile multiplicand B - ThreadPairsB = divide_assert::value, - }; - -public: - - //------------------------------------------------------------------------- - // Member types - //------------------------------------------------------------------------- - - /// Dot-product vector type - typedef __half dp_vector_t; - - /// Scratch storage layout - struct scratch_storage_t {}; - - -private: - - //------------------------------------------------------------------------- - // Members - //------------------------------------------------------------------------- - - /// Thread's tile of C accumulator pairs (the uint32_t SIMD pairs are - /// column-major, the 2D tile layout is also column-major) - uint32_t accumulator_pairs[ThreadTilePairsX][ThreadTilePairsY]; - - - //------------------------------------------------------------------------- - // Utility methods - //------------------------------------------------------------------------- - - /** - * Compute an HFMA2 MAD - */ - inline __device__ void mad( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - - asm volatile ("fma.rn.f16x2 %0, %1, %2, %3;\n" - : "=r"(d) : "r"(a), "r"(b), "r"(c)); - } - - - /** - * Compute an HFMA2 MAD with replicated b.lo: - * d{hi} = a{hi} * b{lo} + c{hi}; - * d{lo} = a{lo} * b{lo} + c{lo}; - */ - inline __device__ void mad_replicate_low( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - // Replicate low halves of b - uint32_t replicate; - asm volatile ( - "{" - " .reg .b16 b_low,b_high;\n" - " mov.b32 {b_low,b_high}, %1;\n" - " mov.b32 %0, {b_low,b_low};\n" - "}" : "=r"(replicate) : "r"(b)); - - mad(d, a, replicate, c); - } - - - /** - * Compute an HFMA2 MAD with replicated b.hi: - * d{hi} = a{hi} * b{hi} + c{hi}; - * d{lo} = a{lo} * b{hi} + c{lo}; - */ - inline __device__ void mad_replicate_high( - uint32_t &d, - const uint32_t &a, - const uint32_t &b, - const uint32_t &c) - { - // Replicate high halves of b - uint32_t replicate; - asm volatile ( - "{" - " .reg .b16 b_low,b_high;\n" - " mov.b32 {b_low,b_high}, %1;\n" - " mov.b32 %0, {b_high,b_high};\n" - "}" : "=r"(replicate) : "r"(b)); - - mad(d, a, replicate, c); - } - - - /** - * Compute a multiply-add at accumulator SIMD-pair coordinates (pair_x, pair_y) - */ - inline __device__ - void mad_xy_even( - uint32_t (&pairs_tile_a)[ThreadPairsA], - uint32_t (&pairs_tile_b)[ThreadPairsB], - int pair_x, - int pair_y) - { - // Even column: use low half of the b pair - mad_replicate_low( - accumulator_pairs[pair_x][pair_y], - pairs_tile_a[pair_y], - pairs_tile_b[pair_x / 2], - accumulator_pairs[pair_x][pair_y]); - } - - - /** - * Compute a multiply-add at accumulator SIMD-pair coordinates (pair_x, pair_y) - */ - inline __device__ - void mad_xy_odd( - uint32_t (&pairs_tile_a)[ThreadPairsA], - uint32_t (&pairs_tile_b)[ThreadPairsB], - int pair_x, - int pair_y) - { - // Odd column: use high half of the b pair - mad_replicate_high( - accumulator_pairs[pair_x][pair_y], - pairs_tile_a[pair_y], - pairs_tile_b[pair_x / 2], - accumulator_pairs[pair_x][pair_y]); - } - - -public: - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor - inline __device__ - thread_accumulator( - scratch_storage_t &scratch) - {} - - - //------------------------------------------------------------------------- - // Accumulator API - //------------------------------------------------------------------------- - - /** - * Zero-initialize thread accumulators. - */ - inline __device__ - void init() - { - #pragma unroll - for (int y = 0; y < ThreadTilePairsY; ++y) - { - #pragma unroll - for (int x = 0; x < ThreadTilePairsX; ++x) - { - accumulator_pairs[x][y] = 0; - } - } - } - - - /** - * Retrieve the accumulator at thread tile coordinates (x, y) - */ - inline __device__ - __half get(int x, int y) - { - // SIMD pairs are column-major - uint32_t pair = accumulator_pairs[x][y / 2]; - - return reinterpret_cast<__half (&)[2]>(pair)[y % 2]; - } - - - /** - * \brief Compute the product of pairs_tile_a and pairs_tile_b and add the result to - * the tile of accumulators. - */ - inline __device__ - void multiply_accumulate( - dp_vector_t (&tile_a)[ThreadItemsY], - dp_vector_t (&tile_b)[ThreadItemsX]) - { - typedef uint32_t pairs_tile_a_t[ThreadPairsA]; - typedef uint32_t pairs_tile_b_t[ThreadPairsB]; - - // Alias slices in pairs - pairs_tile_a_t &pairs_tile_a = reinterpret_cast(tile_a); - pairs_tile_b_t &pairs_tile_b = reinterpret_cast(tile_b); - - // Simply traverse the accumulator tile in column-major order - #pragma unroll - for (int x = 0; x < ThreadTilePairsX; ++x) - { - #pragma unroll - for (int y = 0; y < ThreadTilePairsY; ++y) - { - mad_xy_even(pairs_tile_a, pairs_tile_b, x, y); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/thread_multiply_add.h b/cutlass/gemm/thread_multiply_add.h new file mode 100644 index 0000000000..20dca15965 --- /dev/null +++ b/cutlass/gemm/thread_multiply_add.h @@ -0,0 +1,84 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Template implementing matrix multiply-add operations on fragments. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Template performing matrix multiply-add operation within a thread +template +struct ThreadMultiplyAdd { + /// The shape of the instruction. + typedef Shape<1, 1, 1, 1> InstructionShape; + /// The number of accumulators per thread. + typedef AccumulatorsPerThread_ AccumulatorsPerThread; + /// The number of threads per warp. + typedef ThreadsPerWarp_ ThreadsPerWarp; + /// The number of accumulators per warp. + typedef typename ShapeMul::Shape AccumulatorsPerWarp; + /// The type for A. + typedef ScalarA_ ScalarA; + /// The fragment for A. + typedef Fragment FragmentA; + /// The type for B. + typedef ScalarB_ ScalarB; + /// The fragment for B. + typedef Fragment FragmentB; + /// The type for C and D. + typedef ScalarC_ ScalarC; + /// The accumulators. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE ThreadMultiplyAdd() {} + + /// Multiply : d = a*b + c. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < AccumulatorsPerThread::kH; ++j) { + for (int i = 0; i < AccumulatorsPerThread::kW; ++i) { + d[j * AccumulatorsPerThread::kW + i] = a[i] * b[j] + c[j * AccumulatorsPerThread::kW + i]; + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/wmma_accumulator.h b/cutlass/gemm/wmma_accumulator.h deleted file mode 100644 index dfd0f851b9..0000000000 --- a/cutlass/gemm/wmma_accumulator.h +++ /dev/null @@ -1,215 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Thread-level multiply-accumulate abstraction - * (Volta 4B accum_t specialization) - */ - -#include - -#include "../util/util.h" -#include "dp_accummulate.h" - -namespace cutlass { -namespace gemm { - - -/*! - *\brief matrix_layout to perform conversion between Cutlass types and WMMA types - */ -template -struct matrix_layout; - -/// Maps matrix_transform_t::NonTranspose to nvcuda::wmma::mem_col_major -template <> -struct matrix_layout -{ - /// Type tag in nvcuda::wmma namespace - typedef nvcuda::wmma::col_major tag; - - /// Column major layout - static const nvcuda::wmma::layout_t kind = nvcuda::wmma::mem_col_major; - - /// Cutlass matrix transform kind - static const matrix_transform_t::kind_t cutlass_kind = matrix_transform_t::NonTranspose; -}; - -/// Maps matrix_transform_t::NonTranspose to nvcuda::wmma::mem_row_major -template <> -struct matrix_layout -{ - /// Type tag in nvcuda::wmma namespace - typedef nvcuda::wmma::row_major tag; - - /// Column major layout - static const nvcuda::wmma::layout_t kind = nvcuda::wmma::mem_row_major; - - /// Cutlass matrix transform kind - static const matrix_transform_t::kind_t cutlass_kind = matrix_transform_t::Transpose; -}; - -/*! - * \brief Warp-synchronous matrix multiply-accumulate abstraction - * - * wmma_accumulator maps the CUDA WMMA API onto the GEMM structure - */ -template < - int WarpItemsY, /// Number of rows of the warp's accumulator tile - int WarpItemsX, /// Number of columns of the warp's accumulator tile - int WmmaItemsY, /// Number of rows in a single WMMA operation - int WmmaItemsX, /// Number of columns in a single WMMA operation - int WmmaItemsK, /// Inner dimension of WMMA operation - typename value_a_t, /// Type of A operand - typename value_b_t, /// Type of B operand - typename accum_t, /// Type of source and destination accumulators - matrix_transform_t::kind_t TransformA, /// Layout of A operand - matrix_transform_t::kind_t TransformB /// Layout of B operand -> -struct wmma_accumulator -{ -public: - - //------------------------------------------------------------------------- - // Constants and types - //------------------------------------------------------------------------- - - enum - { - /// Number of WMMA blocks in warp row - WmmaBlocksX = divide_assert::value, - - /// Number of WMMA blocks in a warp column - WmmaBlocksY = divide_assert::value, - }; - - /// Fragment type for matrix operand A - typedef nvcuda::wmma::fragment< - nvcuda::wmma::matrix_a, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - value_a_t, - typename matrix_layout::tag> - fragment_a_t; - - /// Fragment type for matrix operand B - typedef nvcuda::wmma::fragment< - nvcuda::wmma::matrix_b, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - value_b_t, - typename matrix_layout::tag> - fragment_b_t; - - /// Fragment type for accumulator - typedef nvcuda::wmma::fragment< - nvcuda::wmma::accumulator, - WmmaItemsY, - WmmaItemsX, - WmmaItemsK, - accum_t> - accumulator_t; - - /// Scratch storage layout - struct scratch_storage_t - { - /// Initialization vector - uint4 zero_slab; - }; - -public: - //------------------------------------------------------------------------- - // Data members - //------------------------------------------------------------------------- - - /// Thread's tile of accumulators - accumulator_t accumulators[WmmaBlocksX][WmmaBlocksY]; - -public: - - //------------------------------------------------------------------------- - // Constructor API - //------------------------------------------------------------------------- - - /// Constructor initializes accumulators to zero - inline __device__ - wmma_accumulator() - { - init(); - } - - - //------------------------------------------------------------------------- - // Accumulator API - //------------------------------------------------------------------------- - - /** - * \brief Zero-initialize thread accumulators. - */ - inline __device__ - void init() - { - #pragma unroll - for (int x = 0; x < WmmaBlocksX; ++x) - { - #pragma unroll - for (int y = 0; y < WmmaBlocksY; ++y) - { - nvcuda::wmma::fill_fragment(accumulators[x][y], accum_t(0)); - } - } - } - - /** - * \brief Compute the product of tile_a and tile_b and add the result to - * the tile of accumulators. - */ - inline __device__ - void multiply_accumulate( - fragment_a_t (&tile_a)[WmmaBlocksY], - fragment_b_t (&tile_b)[WmmaBlocksX]) - { - #pragma unroll - for (int x = 0; x < WmmaBlocksX; ++x) - { - #pragma unroll - for (int y = 0; y < WmmaBlocksY; ++y) - { - nvcuda::wmma::mma_sync(accumulators[x][y], tile_a[y], tile_b[x], accumulators[x][y]); - } - } - } -}; - - -} // namespace gemm -} // namespace cutlass diff --git a/cutlass/gemm/wmma_gemm_epilogue_traits.h b/cutlass/gemm/wmma_gemm_epilogue_traits.h new file mode 100644 index 0000000000..0fafacf90e --- /dev/null +++ b/cutlass/gemm/wmma_gemm_epilogue_traits.h @@ -0,0 +1,161 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines structural properties of WMMA GEMM's epilogue phase. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmEpilogueTraitsHelper { + /// The scalar. + typedef typename EpilogueFunctor_::Scalar Scalar; + /// The output tile. + typedef typename GemmConfig_::OutputTile OutputTile; + + /// The number of WMMAs in the H dimension. + static int const kWmmasPerH = + GemmConfig_::AccumulatorsPerWarp::kH / GemmConfig_::InstructionShape::kH; + /// The number of iterations in the epilogue. That's the number of "horizontal" WMMAs. + typedef Shape<1, 1, kWmmasPerH> Iterations; + // The iteration strides in the H/W dimension. + typedef Shape<0, 0, 0> Delta; + /// The functor to do the math in the epilogue. + typedef EpilogueFunctor_ Functor; + + /// The traits class to build the iterator to store to shared memory for D. + typedef WmmaGemmSharedStoreTileDTraits< + // The output layout. + MatrixLayout::kColumnMajor, + // The pointer is float. + typename Functor::Scalar, + // The output tile size. + typename GemmConfig_::OutputTile, + // The number of warps. + typename GemmConfig_::Warps, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedStoreTileTraits; + + typedef WmmaMatrix + WmmaMatrix; + + /// The iterator to store D to shared memory. + typedef TileStoreIterator + SharedStoreIteratorD; + + /// The shared store transformer for D. + typedef Copy SharedStoreTransformerD; + + /// The traits class to build the iterator to load from shared memory for D. + typedef WmmaGemmSharedLoadTileDTraits< + // The pointer. + typename Functor::Scalar, + // The tile size. + typename SharedStoreIteratorD::Tile, + // The number of threads. + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDS. + GemmConfig_::kScalarsPerLdsD> + SharedLoadTileTraits; + + /// The iterator to load D from shared memory. + typedef TileLoadIterator + SharedLoadIteratorD; + + /// The traits class to build the iterator to load data from global memory for C^N. + typedef WmmaGemmGlobalIteratorCdTraits< + // The pointer is float const. + typename GemmConfig_::ScalarC const, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgC> + GlobalLoadTileTraits; + + /// The iterator to load C. + typedef WmmaGemmGlobalIteratorCd GlobalLoadIteratorC; + /// The transformer for C. + typedef Copy GlobalTransformerC; + + /// The traits class to build the iterator to store data to global memory for D^N. + typedef WmmaGemmGlobalIteratorCdTraits< + // The pointer is float. + typename GemmConfig_::ScalarD, + // The tile has size (N / Iterations)xM in GEMM's terminology. + Shape<1, + GemmConfig_::OutputTile::kH / ShapeCount::kCount, + GemmConfig_::OutputTile::kW>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, ShapeCount::kCount, GemmConfig_::kWarpSize>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerStgD> + GlobalStoreTileTraits; + + /// The iterator to store D. + typedef WmmaGemmGlobalIteratorCd GlobalStoreIteratorD; + /// The transformer for D. + typedef Copy GlobalTransformerD; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/gemm/wmma_gemm_global_tile.h b/cutlass/gemm/wmma_gemm_global_tile.h new file mode 100644 index 0000000000..32d9759a90 --- /dev/null +++ b/cutlass/gemm/wmma_gemm_global_tile.h @@ -0,0 +1,203 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines tile iterator traits for loading thread block-level tile from global memory. +*/ +#pragma once + +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmGlobalIteratorCdTraits : public GemmGlobalTileTraits { + /// The base class. + typedef GemmGlobalTileTraits + Base; + + /// Override the strides in each dimension between different loads/stores. + typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> Delta; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int thread_offset_h = threadIdx.x / Base::Threads::kW; + int thread_offset_w = threadIdx.x % Base::Threads::kW * Base::ThreadsDelta::kW; + + return make_Coord(0, thread_offset_h, thread_offset_w, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmGlobalIteratorCd : public TileIteratorBase { + /// This class. + typedef WmmaGemmGlobalIteratorCd This_; + /// The traits. + typedef TileTraits_ Traits; + /// The base class. + typedef TileIteratorBase + Base; + /// Override the strides in each dimension between different loads/stores. + typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> ImmediateOffsetStrides; + /// The layout. + static MatrixLayout::Kind const kLayout = TileTraits_::kLayout; + + /// The scalar. + typedef typename TileTraits_::Scalar Scalar; + /// The pointer. + typedef typename TileTraits_::Pointer Pointer; + /// The threads. + typedef typename TileTraits_::Threads Threads; + /// The index. + typedef Index_ Index; + /// The thread offset functor. + typedef typename TileTraits_::ThreadOffset ThreadOffset; + + /// The params. + struct Params { + /// The pointer. + Pointer pointer; + /// The stride in the H dimension to setup the thread in the block. + Index stride_h; + /// The strides to increment the pointer. + Index inc_h, inc_advance; + /// The column offset to compute the predicate for the columns. + Index predicate_offset; + /// The strides to increment the predicate offset. + Index predicate_inc_h, predicate_inc_advance; + + /// Setup the params. + CUTLASS_HOST_DEVICE int initialize( + Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w) { + // The pointer. + this->pointer = pointer; + // Setup the base stride. One "group of threads" per column. + stride_h = ld; + // Each thread output 1 column per iteration. . + inc_h = ld * TileTraits_::Threads::kH; + inc_advance = inc_h + epilogue_stride_w; + + predicate_offset = n; + predicate_inc_h = TileTraits_::Threads::kH; + predicate_inc_advance = predicate_inc_h + epilogue_delta_w; + + // It worked. + return 0; + } + }; + + Params params; + + Coord<4> thread_offset; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmGlobalIteratorCd() {} + + /// Ctor. + CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const& params, + const Coord<3>& bounds, + const Coord<3>& block, + int const pointer_offset = 0, + int const pred_offset = 0, + ThreadOffset thread_offset_func = ThreadOffset()) + + : params(params) { + thread_offset = thread_offset_func(); + // Each warp works on a different column of the tile. + int const h = thread_offset[1] + block[1]; + // Each lane writes a different element. + int const w = thread_offset[2] + block[2]; + // Setup the pointer. + this->params.pointer += ((h * params.stride_h + w) + pointer_offset); + + // Prepare the vector of predicates. + for (int i = 0; i < Base::Iterations::kW; ++i) { + predicates.set(i, w + i * Base::Delta::kW < bounds[2]); + } + this->params.predicate_offset -= (h + pred_offset); + } + + /// Increment the pointer in the C dimension. + CUTLASS_DEVICE void inc_c() {} + /// Increment the pointer in the W dimension. + CUTLASS_DEVICE void inc_w() {} + /// Increment the pointer in the H dimension. + CUTLASS_DEVICE void inc_h() { + params.pointer += params.inc_h; + params.predicate_offset -= params.predicate_inc_h; + } + /// Increment the pointer in the D dimension. + CUTLASS_DEVICE void inc_d() {} + /// Increment the pointer to move to the next iteration. + CUTLASS_DEVICE void inc_advance() { + params.pointer += params.inc_advance; + params.predicate_offset -= params.predicate_inc_advance; + } + + /// Test the predicate. + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { + return predicates.at(w) && params.predicate_offset > 0; + } + + /// Returns the raw pointer + CUTLASS_HOST_DEVICE + Pointer data() { return params.pointer; } + + CUTLASS_HOST_DEVICE + Pointer const data() const { return params.pointer; } + + /// The predicates for the row. + cutlass::PredicateVector predicates; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass diff --git a/cutlass/gemm/wmma_gemm_multiply_add.h b/cutlass/gemm/wmma_gemm_multiply_add.h new file mode 100644 index 0000000000..5968350e05 --- /dev/null +++ b/cutlass/gemm/wmma_gemm_multiply_add.h @@ -0,0 +1,108 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmMultiplyAdd { + /// The shape of the instruction. + typedef InstructionShape_ InstructionShape; + /// The number of threads per warp. That's a dummy configuration. + typedef Shape<1, InstructionShape_::kH, InstructionShape_::kW> ThreadsPerWarp; + /// The dimensions. + typedef AccumulatorsPerWarp_ AccumulatorsPerWarp; + /// The type for A. + typedef ScalarA_ ScalarA; + /// The type for B. + typedef ScalarB_ ScalarB; + /// The type for C and D. + typedef ScalarC_ ScalarC; + /// The number of iterations. + typedef typename ShapeDiv::Shape Iterations; + + /// The element for A. + typedef WmmaMatrix ElementA; + /// The fragment for A. + typedef Fragment FragmentA; + + /// The element for B. + typedef WmmaMatrix ElementB; + /// The fragment for B. + typedef Fragment FragmentB; + + /// The element for C. + typedef WmmaMatrix ElementC; + /// The fragment for C. + typedef Fragment Accumulators; + + /// Ctor. + CUTLASS_DEVICE WmmaGemmMultiplyAdd() {} + + /// Multiply : d = a*b. + CUTLASS_DEVICE void multiply_add(FragmentA const& a, + FragmentB const& b, + Accumulators const& c, + Accumulators& d) { + for (int j = 0; j < Iterations::kH; ++j) { + for (int i = 0; i < Iterations::kW; ++i) { + // The input elements. + ElementA const& elt_a = a[i]; + ElementB const& elt_b = b[j]; + ElementC const& elt_c = c[j * Iterations::kW + i]; + + // The output element. + ElementC& elt_d = d[j * Iterations::kW + i]; + + // The wmma instruction. + nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c); + } + } + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/gemm/wmma_gemm_shared_tile.h b/cutlass/gemm/wmma_gemm_shared_tile.h new file mode 100644 index 0000000000..7d15b260fa --- /dev/null +++ b/cutlass/gemm/wmma_gemm_shared_tile.h @@ -0,0 +1,240 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines iterator traits for efficiently loading and storing fragment to and from shared + memory, specialized for WMMA GEMM. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API + +#include +#include + +namespace cutlass { +namespace gemm { + +template +struct Debug {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedLoadTileATraits { + /// The operand. + static GemmOperand::Kind const kOperand = GemmOperand::kA; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef Scalar_ Scalar; + /// The pointer. + typedef Scalar const* Pointer; + /// The access size + static int const kAccessSize = 1; + /// The tile with skew. + typedef Tile_ Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The warps strides. + static int const kWarpStride = kWarpStride_; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The strides between iterations. + typedef Delta_ Delta; + /// The strides between iterations. + typedef Delta_ ImmediateOffsetStrides; + /// The shape of the WMMA instruction. + typedef WmmaShape_ WmmaShape; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The warp id. + int const warp = threadIdx.x / kWarpSize; + // The offset. + int const offset = warp % Warps::kW * kWarpStride; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedLoadTileBTraits { + /// The operand. + static GemmOperand::Kind const kOperand = GemmOperand::kB; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef Scalar_ Scalar; + /// The pointer. + typedef Scalar const* Pointer; + /// The access size + static int const kAccessSize = 1; + /// The tile with skew. + typedef Tile_ Tile; + /// The number of warps. + typedef Warps_ Warps; + /// The warps strides. + static int const kWarpStride = kWarpStride_; + /// The number of iterations. + typedef Iterations_ Iterations; + /// The strides between iterations. + typedef Delta_ Delta; + /// The strides between iterations. + typedef Delta_ ImmediateOffsetStrides; + /// The shape of the WMMA instruction. + typedef WmmaShape_ WmmaShape; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The warp id. + int const warp = threadIdx.x / kWarpSize; + // The offset. + int const offset = warp / Warps::kW * kWarpStride; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedStoreTileDTraits { + /// The operand. + static GemmOperand::Kind const kOperand = GemmOperand::kC; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + /// The scalar. + typedef Scalar_ Scalar; + // The access size + static int const kAccessSize = 1; + /// The pointer. + typedef Scalar* Pointer; + /// The number of warps. + typedef Warps_ Warps; + /// The shape of the WMMA instruction. + typedef WmmaShape_ WmmaShape; + /// The skew. + static int const kSkew = kSkew_; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + /// The tile with skew. + typedef Shape<1, Warps_::kH * WmmaShape_::kH, OutputTile_::kW + kSkew_> Tile; + /// The number of iterations needed to store the tile. + typedef Shape<1, 1, OutputTile_::kW / Warps::kW / WmmaShape_::kW> Iterations; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> ImmediateOffsetStrides; + + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The warp id. + int const warp = threadIdx.x / kWarpSize; + // The starting column. + int const h = warp / Warps::kW * WmmaShape::kH; + // The w. + int const w = warp % Warps::kW * WmmaShape::kW; + // The offset. + int const offset = h * Tile::kW + w; + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmSharedLoadTileDTraits { + /// The scalar. + typedef Scalar_ Scalar; + /// The pointer. + typedef Scalar const* Pointer; + /// The access size + static int const kAccessSize = kScalarsPerLds_; + /// The tile. + typedef typename ReshapeTile::Tile Tile; + /// The threads. + typedef typename ReshapeThreads::Threads Threads; + /// The threads strides. + typedef Shape<1, Tile::kW * Tile::kC, Tile::kC> ThreadsStrides; + /// The memory space. + static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared; + + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kScalarsPerLds_> Delta; + /// The strides in each dimension between different loads/stores. + typedef Shape<0, Threads::kH * ShapeCount::kWc, Threads::kW * kScalarsPerLds_> + ImmediateOffsetStrides; + /// The number of iterations needed to load/store the tile. + typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kScalarsPerLds_> + Iterations; + + /// ThreadOffset + struct ThreadOffset { + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + // The offset. + int const offset = ComputeThreadOffsetFromStrides::get(); + return make_Coord(0, 0, offset, 0); + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/gemm/wmma_gemm_traits.h b/cutlass/gemm/wmma_gemm_traits.h new file mode 100644 index 0000000000..7901201630 --- /dev/null +++ b/cutlass/gemm/wmma_gemm_traits.h @@ -0,0 +1,574 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defies structural properties of GEMM targeting WMMA API in CUDA. +*/ +#pragma once + +#include +#ifdef CUTLASS_USE_WMMA_API + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cutlass { +namespace gemm { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The tile size for the GEMM KxNxM. + typename OutputTile_, + /// The output type. + typename ScalarC_, + /// The accumulator type. + typename Accumulator_, + /// The number of accumulators per warp. + typename AccumulatorsPerWarp_, + /// The shape of the WMMA instruction. + typename InstructionShape_, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_> +struct WmmaGemmConfig : public GemmConfig< + /// The scalar type for A. + half, + /// The scalar type for B. + half, + /// The scalar type for C. + ScalarC_, + /// The scalar type for D. + ScalarC_, + /// The tile size for the GEMM KxNxM. + OutputTile_, + /// The functor to do the math in the main loop. + WmmaGemmMultiplyAdd, + /// The number of scalars per LDG for A. + kScalarsPerLdgA_, + /// The number of scalars per STS for A. + kScalarsPerLdgA_, + /// The number of scalars per LDS for A. + 8, + /// The number of scalars per LDG for B. + kScalarsPerLdgB_, + /// The number of scalars per STS for B. + kScalarsPerLdgB_, + /// The number of scalars per LDS for B. + 8, + /// The number of scalars per LDG for C and STG for D. + 16 / sizeof(ScalarC_), + /// The number of scalars per STS for D. + 16 / sizeof(ScalarC_), + /// The number of scalars per LDS for D. + 16 / sizeof(ScalarC_), + /// The number of stages in shared memory. + 1> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperA {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperA + : public GemmTileTraitsHelperA { + /// The base config. + typedef GemmTileTraitsHelperA Base; + + /// The skew. + static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar); + /// The shared tile size. + typedef Shape + Tile; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + typename Base::MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename Base::GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The number of scalars loaded per iteration. + static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + typename Base::MultiplyAddScalar, + // The output tile size. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperA { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarA Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for A^T. + typedef GemmGlobalTileTraits< + // That's A. + GemmOperand::kA, + // A is row-major. + MatrixLayout::kRowMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgA> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for A^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsA> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW; + /// The traits class to build the iterator to load from shared memory for A. + typedef WmmaGemmSharedLoadTileATraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kW * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperB {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperB + : public GemmTileTraitsHelperB { + /// The base config. + typedef GemmTileTraitsHelperB Base; + + /// The skew. + static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar); + /// The shared tile size. + typedef Shape + Tile; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to store data to shared memory for B^T. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + typename Base::MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename Base::GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The number of scalars loaded per iteration. + static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kRowMajor, + // The pointer. + typename Base::MultiplyAddScalar, + // The output tile size. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct WmmaGemmTileTraitsHelperB { + /// The layout. + static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor; + + /// The input scalar. + typedef typename GemmConfig_::ScalarB Scalar; + /// The scalar stored in shared memory. + typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar; + + /// WMMA matrix + typedef WmmaMatrix + WmmaMatrix; + + /// The traits class to build the iterator to load data from global memory for B^N. + typedef GemmGlobalTileTraits< + // That's B. + GemmOperand::kB, + // A is row-major. + MatrixLayout::kColumnMajor, + // The pointer is float const. + Scalar const, + // The tile has size KxM in GEMM's terminology. + Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, + // The threads are distributed as warps x 32 (the traits may reorganize). + Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, + // The number of scalars per LDG (LDG.32 or LDG.128, etc). + GemmConfig_::kScalarsPerLdgB> + GlobalTileTraits; + + /// The skew. + static int const kSkew = 16 / sizeof(MultiplyAddScalar); + /// The tile. + typedef Shape + Tile; + + /// The traits class to build the iterator to store data to shared memory for B^N. + typedef GemmSharedStoreTileAbTraits< + // The pointer. + MultiplyAddScalar, + // The tile has size KxM in GEMM's terminology. + Tile, + // The threads are distributed as warps x 32 (the traits may reorganize). + typename GlobalTileTraits::Threads, + // The number of scalars per STS (STS.32 or STS.128, etc). + GemmConfig_::kScalarsPerStsB> + SharedStoreTileTraits; + + /// The number of elements loaded in one LDG. + static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH; + /// The traits class to build the iterator to load from shared memory for B. + typedef WmmaGemmSharedLoadTileBTraits< + // The layout of the matrix. + MatrixLayout::kColumnMajor, + // The pointer. + MultiplyAddScalar, + // The tile in shared memory. + Tile, + // The number of warps. + typename GemmConfig_::Warps, + // The strides between warps. + GemmConfig_::InstructionShape::kH * Tile::kW, + // The number of iterations to load the data. + Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>, + // The stride between iterations. + Shape, + // The shape of the instruction. + typename GemmConfig_::InstructionShape> + SharedLoadTileTraits; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The output tile. + typename OutputTile_, + /// The output type. + typename ScalarC_, + /// The accumulator type. + typename Accumulator_, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_, + /// The number of accumulators per warp. + typename AccumulatorsPerWarp_, + /// The shape of the WMMA instruction. + typename InstructionShape_, + /// The number of halfs loaded in one LDG for A. + int kScalarsPerLdgA_, + /// The number of halfs loaded in one LDG for B. + int kScalarsPerLdgB_, + /// The index. + typename Index_> +struct WmmaGemmTraitsHelper { + /// The WMMA GEMM config. + typedef WmmaGemmConfig + GemmConfig; + + /// The GEMM config for A. + typedef WmmaGemmTileTraitsHelperA GemmTileTraitsHelperA; + /// The GEMM config for B. + typedef WmmaGemmTileTraitsHelperB GemmTileTraitsHelperB; + + /// The iterator to load A from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorA; + /// The default transformer for A. + typedef Copy GlobalTransformerA; + /// The iterator to store A to shared memory. + typedef TileStoreIterator + SharedStoreIteratorA; + /// The stream to load A from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamA; + + /// The iterator to load B from global memory. + typedef GemmGlobalIteratorAb + GlobalLoadIteratorB; + // The default transformer for B. + typedef Copy GlobalTransformerB; + /// The iterator to store B to shared memory. + typedef TileStoreIterator + SharedStoreIteratorB; + /// The stream to load B from global memory to shared memory. + typedef GlobalLoadStream + GlobalLoadStreamB; + + /// The iterator to load A from shared memory. + typedef TileLoadIterator + SharedLoadIteratorA; + /// The stream to load A from shared memory. + typedef SharedLoadStream SharedLoadStreamA; + /// The iterator to load B from shared memory. + typedef TileLoadIterator + SharedLoadIteratorB; + /// The stream to load B from shared memory. + typedef SharedLoadStream SharedLoadStreamB; + + /// The functor to do the multiply-add in the main loop. + typedef typename GemmConfig::MultiplyAdd MultiplyAdd; + /// The object to clear accumulators. + typedef ClearAccumulators ClearAccumulators; + + /// The helper to create the epilogue traits. + typedef WmmaGemmEpilogueTraitsHelper EpilogueTraitsHelper; + /// The traits class for the epilogue. + typedef SimplifiedGemmEpilogueTraits + GemmEpilogueTraits; + /// The epilogue. + typedef GemmEpilogue Epilogue; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template > +struct WmmaGemmAccumulatorsPerWarp { + typedef typename ShapeMin::Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template < + /// The layout for A. + MatrixLayout::Kind kLayoutA_, + /// The layout for B. + MatrixLayout::Kind kLayoutB_, + /// The tile size for the GEMM KxNxM. + typename OutputTile_ = Shape<64, 128, 128>, + /// The output type. + typename ScalarC_ = float, + /// The functor to do the math in the epilogue. + typename EpilogueFunctor_ = LinearScaling, + /// The accumulator type. + typename Accumulator_ = ScalarC_, + /// The number of accumulators per warp. + typename AccumulatorsPerWarp_ = typename WmmaGemmAccumulatorsPerWarp::Shape, + /// The shape of the WMMA instruction. + typename InstructionShape_ = Shape<16, 16, 16>, + /// The number of scalars per LDG for A. + int kScalarsPerLdgA_ = 8, + /// The number of scalars per LDG for B. + int kScalarsPerLdgB_ = 8, + /// The index. + typename Index_ = int, + /// The helper class. + typename Helper_ = WmmaGemmTraitsHelper > +struct WmmaGemmTraits : public GemmTraits< + // The config. + typename Helper_::GemmConfig, + // The stream to load A from global memory to shared memory. + typename Helper_::GlobalLoadStreamA, + // The stream to load B from global memory to shared memory. + typename Helper_::GlobalLoadStreamB, + // The stream to load A from shared memory. + typename Helper_::SharedLoadStreamA, + // The stream to load B from shared memory. + typename Helper_::SharedLoadStreamB, + // The epilogue. + typename Helper_::Epilogue, + // The block swizzle to reorganize the grid. + IdentityBlockSwizzle, + // The index. + Index_, + // The tool used to clear accumulators. + typename Helper_::ClearAccumulators> {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace gemm +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass/iterator_access.h b/cutlass/iterator_access.h new file mode 100644 index 0000000000..db87e0d137 --- /dev/null +++ b/cutlass/iterator_access.h @@ -0,0 +1,325 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Free functions for loading and storing to implementations of tile iteartor concepts. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) { + typename InputIterator::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < InputIterator::Iterations::kD; ++d) { + for (int h = 0; h < InputIterator::Iterations::kH; ++h) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w) { + for (int c = 0; c < InputIterator::Iterations::kC; ++c) { + if (iterator.valid(d, h, w, c)) { + int const offset = + ComputeOffsetFromStrides::get( + 0, 0, w, c); + Load:: + load(reinterpret_cast( + frag_iterator.at(d, h, w, c)), + iterator.data(), + offset); + } + } + if (w < InputIterator::Iterations::kW - 1) { + iterator.inc_w(); + } + } + if (h < InputIterator::Iterations::kH - 1) { + iterator.inc_h(); + } + } + if (d < InputIterator::Iterations::kD - 1) { + iterator.inc_d(); + } + } + iterator.inc_advance(); +} + +/// Loads a fragment from a shared memory input iterator +template +CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment) { + typename InputIterator::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < InputIterator::Iterations::kD; ++d) { + for (int h = 0; h < InputIterator::Iterations::kH; ++h) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w) { + for (int c = 0; c < InputIterator::Iterations::kC; ++c) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, c); + + FragmentLoad::load(frag_iterator.at(d, h, w, c), + iterator.data(), + offset); + } + } + } + } +} + +/// Loads a fragment from a shared memory input iterator +template +CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d) { + typename InputIterator::FragmentIterator frag_iterator(fragment); + for (int h = 0; h < InputIterator::Iterations::kH; ++h) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w) { + for (int c = 0; c < InputIterator::Iterations::kC; ++c) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, c); + + FragmentLoad::load(frag_iterator.at(0, h, w, c), + iterator.data(), + offset); + } + } + } +} + +/// Loads a fragment from an input iterator, masked by a predicate iterator +template +CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, + Fragment &fragment, + typename InputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + for (int d = 0; d < InputIterator::Iterations::kD; ++d, iterator.inc_d()) { + for (int h = 0; h < InputIterator::Iterations::kH; ++h, iterator.inc_h()) { + for (int w = 0; w < InputIterator::Iterations::kW; ++w, iterator.inc_w()) { + if (predicate_adapter.at(d, h, w, 0)) { + int idx = InputIterator::Tile::kC * + (w + InputIterator::Iterations::kW * (h + InputIterator::Iterations::kH * d)); + + Load:: + load(reinterpret_cast(fragment[idx]), + iterator.data(), + offset); + } + } + } + } +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, + Fragment &fragment, + typename InputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_load_post_increment(iterator, fragment, offset, pred); +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, + Fragment &fragment, + ConstPredicateAdapter pred_it) { + iterator_load_post_increment(iterator, fragment, 0, pred_it); +} + +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &_iterator, + Fragment &fragment, + typename InputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + InputIterator iterator(_iterator); + iterator_load_post_increment(iterator, fragment, offset, predicate_adapter); +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator, + Fragment &fragment, + typename InputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_load(iterator, fragment, offset, pred); +} + +/// Loads a fragment from an input iterator +template +CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator, + Fragment &fragment, + ConstPredicateAdapter pred_it) { + iterator_load(iterator, fragment, 0, pred_it); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) { + typename OutputIterator::FragmentIterator frag_iterator(fragment); + for (int d = 0; d < OutputIterator::Iterations::kD; ++d) { + for (int h = 0; h < OutputIterator::Iterations::kH; ++h) { + for (int w = 0; w < OutputIterator::Iterations::kW; ++w) { + if (iterator.valid(d, h, w, 0)) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, 0); + + Store:: + store(reinterpret_cast( + frag_iterator.at(d, h, w, 0)), + iterator.data(), + offset); + } + if (w < OutputIterator::Iterations::kW - 1) { + iterator.inc_w(); + } + } + if (h < OutputIterator::Iterations::kH - 1) { + iterator.inc_h(); + } + } + if (d < OutputIterator::Iterations::kD - 1) { + iterator.inc_d(); + } + } + iterator.inc_advance(); +} + +/// Stores a fragment to a shared memory output iterator +template +CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment) { + typename OutputIterator::FragmentConstIterator frag_iterator(fragment); + for (int d = 0; d < OutputIterator::Iterations::kD; ++d) { + for (int h = 0; h < OutputIterator::Iterations::kH; ++h) { + for (int w = 0; w < OutputIterator::Iterations::kW; ++w) { + for (int c = 0; c < OutputIterator::Iterations::kC; ++c) { + int const offset = + ComputeOffsetFromStrides::get( + d, h, w, c); + + FragmentStore::store(frag_iterator.at(d, h, w, c), + iterator.data(), + offset); + } + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Stores a fragment to an output iterator, masked by a predicate iterator +template +CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, + Fragment const &fragment, + typename OutputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + for (int d = 0; d < OutputIterator::Iterations::kD; ++d, iterator.inc_d()) { + for (int h = 0; h < OutputIterator::Iterations::kH; ++h, iterator.inc_h()) { + for (int w = 0; w < OutputIterator::Iterations::kW; ++w, iterator.inc_w()) { + if (predicate_adapter.at(d, h, w, 0)) { + int idx = OutputIterator::Tile::kC * + (w + OutputIterator::Iterations::kW * (h + OutputIterator::Iterations::kH * d)); + + Store:: + store(reinterpret_cast(fragment[idx]), + iterator.data(), + offset); + } + } + } + } +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, + Fragment const &fragment, + typename OutputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_store_post_increment(iterator, fragment, offset, pred); +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, + Fragment const &fragment, + ConstPredicateAdapter pred_it) { + iterator_store_post_increment(iterator, fragment, 0, pred_it); +} + +/// Stores a fragment to an output iterator, masked by a predicate iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &_iterator, + Fragment const &fragment, + typename OutputIterator::Index offset, + ConstPredicateAdapter predicate_adapter) { + OutputIterator iterator(_iterator); + iterator_store_post_increment(iterator, fragment, offset, predicate_adapter); +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator, + Fragment const &fragment, + typename OutputIterator::Index offset = 0) { + TrivialPredicateTileAdapter pred; + iterator_store(iterator, fragment, offset, pred); +} + +/// Stores a fragment to an output iterator +template +CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator, + Fragment const &fragment, + ConstPredicateAdapter pred_it) { + iterator_store(iterator, fragment, 0, pred_it); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/load_store.h b/cutlass/load_store.h new file mode 100644 index 0000000000..d3d0ce81e7 --- /dev/null +++ b/cutlass/load_store.h @@ -0,0 +1,199 @@ +/*************************************************************************************************** + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines abstractions for efficiently loading and storing vectors to memory. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Enum to specify which memory space data resides in. +*/ +struct MemorySpace { + enum Kind { + kGeneric, // Data accessed through pointer dereferencing + kShared, // Data resides in shared memory + kGlobal // Data resides in global memory + }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template 1), + size_t = (sizeof(Scalar_) * Lanes_)> +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The load function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + dst = reinterpret_cast(&pointer[offset])[0]; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + dst.registers[0] = reinterpret_cast(&pointer[offset])[0]; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + uint2 tmp = reinterpret_cast(&pointer[offset])[0]; + dst.registers[0] = tmp.x; + dst.registers[1] = tmp.y; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) { + double2 tmp = reinterpret_cast(&pointer[offset])[0]; + dst[0] = tmp.x; + dst[1] = tmp.y; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Load { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) { + uint4 tmp = reinterpret_cast(&pointer[offset])[0]; + dst.registers[0] = tmp.x; + dst.registers[1] = tmp.y; + dst.registers[2] = tmp.z; + dst.registers[3] = tmp.w; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template 1), + size_t = (sizeof(Scalar_) * Lanes_)> +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + pointer[offset] = src; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint32_t* addr = reinterpret_cast(&pointer[offset]); + addr[0] = src.registers[0]; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint2* addr = reinterpret_cast(&pointer[offset]); + addr[0] = make_uint2(src.registers[0], src.registers[1]); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) { + double2* addr = reinterpret_cast(&pointer[offset]); + addr[0] = make_double2(src[0], src[1]); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Store { + /// The output type. + typedef typename Vectorize::Type AccessType; + + /// The store function. + static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) { + uint4* addr = reinterpret_cast(&pointer[offset]); + addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/matrix_traits.h b/cutlass/matrix_traits.h new file mode 100644 index 0000000000..77e8b70625 --- /dev/null +++ b/cutlass/matrix_traits.h @@ -0,0 +1,48 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines properties of matrices used to denote layout and operands to GEMM kernels. +*/ +#pragma once + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Describes layouts of matrices +struct MatrixLayout { + enum Kind { kRowMajor, kColumnMajor }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Gemm operand - D = A * B + C +struct GemmOperand { + enum Kind { kA, kB, kC, kD }; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/predicate_vector.h b/cutlass/predicate_vector.h new file mode 100644 index 0000000000..81668577e2 --- /dev/null +++ b/cutlass/predicate_vector.h @@ -0,0 +1,493 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines container classes and iterators for managing a statically sized vector + of boolean predicates. +*/ +#pragma once + +#include + +#include +#include + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup predicate_vector_concept Predicate Vector Concept +@{ + +Implementations of \ref predicate_vector_concept contain an ordered set of boolean predicates which +may be used as conditionals in other device-side operations. Both random access and iterators +offering sequential access are provided. + +@par Predicate Vector + A \ref predicate_vector_concept satisfies the following expressions + - at(int idx) - returns the value of the indexed predicate + - set(int idx, bool value) - sets the value of the indexed predicate + - begin() - returns a \ref predicate_iterator_concept pointing to the first predicate + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup predicate_iterator_concept Predicate Iterator Concept +@{ + +Implementations of \ref predicate_iterator_concept enables accessing and traversing elements of a +bit vector. + +@par Const Predicate Iterator + A const \ref predicate_iterator_concept satisfies the following expressions + - ++it increments the iterator to the next predicate + - *it returns the value of the currently pointed-to predicate + +@par Mutable Predicate Iterator + A \ref predicate_iterator_concept that is non-const also satisfies the following expressions + - it.set(bool value) sets the value of the currently pointed-to predicate + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup predicate_tile_adapter Predicate Tile Adapter Concept +@{ + +Implementations of \ref predicate_tile_adapter provide a mapping between a the elements of a \ref +tile_traits_concept and a \ref predicate_vector_concept. + +@par Predicate Tile Adapter + A \ref predicate_tile_adapter satisfies the following expressions + - at(int d, int h, int w, int c) - returns the value of a predicate corresponding to the + access (d, h, w, c) within the tile. + +@} +*/ + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Statically sized array of bits implementing @concept{predicate_vector_concept}. +template < + /// Number of predicates conatined in predicate vector + int kPredicates_, + /// Number of predicates contained in each byte of internal storage + int kPredicatesPerByte_ = 4, + /// Location of first predicate within byte of internal storage + int kPredicateStart_ = 0> +struct PredicateVector { + /// Number of bits stored by the PredicateVector + static int const kPredicates = kPredicates_; + + /// Number of bits stored within each byte of the predicate bit vector + static int const kPredicatesPerByte = kPredicatesPerByte_; + + /// First bit withing each byte containing predicates + static int const kPredicateStart = kPredicateStart_; + + // Make sure no one tries to put more than 8 bits in a byte :) + static_assert(kPredicatesPerByte <= 8, "kPredicatesPerByte must fit within an actual byte"); + // Make sure the "offsetted" bits fit in one byte. + static_assert(kPredicateStart + kPredicatesPerByte < 8, + "The offsetted predicates must fit within an actual byte."); + + /// Storage type of individual elements + typedef uint32_t Storage; + + /// Number of bytes needed + static int const kBytes = (kPredicates + kPredicatesPerByte - 1) / kPredicatesPerByte; + + /// Number of storage elements needed + static int const kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage); + + private: + // + // Data members + // + + /// Words of bit vector + Storage storageData[kWordCount]; + + // + // Methods + // + + /// Computes the word and bit corresponding to a logical predicate index + CUTLASS_HOST_DEVICE void computeStorageOffset(int &word, int &bit, int idx) const { + CUTLASS_ASSERT(idx < kPredicates); + + int byte = (idx / kPredicatesPerByte); + int bit_offset = (idx % kPredicatesPerByte); + + word = byte / sizeof(Storage); + int byte_offset = (byte % sizeof(Storage)); + + bit = byte_offset * 8 + bit_offset + kPredicateStart; + } + + /// Accesses a given word with optional assertions + CUTLASS_HOST_DEVICE Storage &storage(int word) { + CUTLASS_ASSERT(word < kWordCount); + return storageData[word]; + } + + /// Accesses a given word with optional assertions + CUTLASS_HOST_DEVICE Storage const &storage(int word) const { + CUTLASS_ASSERT(word < kWordCount); + return storageData[word]; + } + + public: + // + // Iterator + // + + /** + * @brief A const iterator implementing \ref predicate_iterator_concept enabling sequential + * read-only access to prediactes. + * @concept{predicate_iterator_concept} + */ + class ConstIterator { + /// Reference to PredicateVector instance + PredicateVector const &vec_; + + /// Index into PredicateVector + int bit_; + + public: + /// Copy constructor + CUTLASS_HOST_DEVICE + ConstIterator(ConstIterator const &it) : vec_(it.vec_), bit_(it.bit_) {} + + /// + CUTLASS_HOST_DEVICE + ConstIterator(PredicateVector const &_vec, int _start = 0) : vec_(_vec), bit_(_start) {} + + /// Pre-increment + CUTLASS_HOST_DEVICE + ConstIterator &operator++() { + ++bit_; + return *this; + } + + /// Pre-decrement + CUTLASS_HOST_DEVICE + ConstIterator &operator--() { + --bit_; + return *this; + } + + /// Post-increment + CUTLASS_HOST_DEVICE + ConstIterator operator++(int) { + ConstIterator ret(*this); + ret.bit_++; + return ret; + } + + /// Post-decrement + CUTLASS_HOST_DEVICE + ConstIterator operator--(int) { + ConstIterator ret(*this); + ret.bit_--; + return ret; + } + + /// Returns true if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator==(ConstIterator const &it) const { return bit_ == it.bit_; } + + /// Returns false if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator!=(ConstIterator const &it) const { return bit_ != it.bit_; } + + /// Dereferences iterator + CUTLASS_HOST_DEVICE + bool operator*() const { return vec_[bit_]; } + }; + + /** + * @brief An iterator implementing \ref predicate_iterator_concept enabling sequential + * read and write access to predicates. + * @concept{predicate_iterator_concept} + */ + class Iterator { + /// Reference to PredicateVector instance + PredicateVector &vec_; + + /// Index into PredicateVector + int bit_; + + public: + /// Copy constructor + CUTLASS_HOST_DEVICE + Iterator(Iterator const &it) : vec_(it.vec_), bit_(it.bit_) {} + + /// Constructs an iterator from a PredicateVector + CUTLASS_HOST_DEVICE + Iterator(PredicateVector &_vec, int _start = 0) : vec_(_vec), bit_(_start) {} + + /// Pre-increment + CUTLASS_HOST_DEVICE + Iterator &operator++() { + ++bit_; + return *this; + } + + /// Pre-decrement + CUTLASS_HOST_DEVICE + Iterator &operator--() { + --bit_; + return *this; + } + + /// Post-increment + CUTLASS_HOST_DEVICE + Iterator operator++(int) { + Iterator ret(*this); + ret.bit_++; + return ret; + } + + /// Post-decrement + CUTLASS_HOST_DEVICE + Iterator operator--(int) { + Iterator ret(*this); + ret.bit_--; + return ret; + } + + /// Returns true if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator==(Iterator const &it) const { return bit_ == it.bit_; } + + /// Returns false if iterators point to the same bit + CUTLASS_HOST_DEVICE + bool operator!=(Iterator const &it) const { return bit_ != it.bit_; } + + /// Gets the bit at the pointed to location + CUTLASS_HOST_DEVICE + bool get() { return vec_[bit_]; } + + /// Dereferences iterator + CUTLASS_HOST_DEVICE + bool operator*() const { return vec_[bit_]; } + + /// Sets the bit at the pointed to location + CUTLASS_HOST_DEVICE + void set(bool value = true) { vec_.set(bit_, value); } + }; + + /// Iterator that always returns true + struct TrivialIterator { + /// Constructor + CUTLASS_HOST_DEVICE + TrivialIterator() {} + + /// Copy constructor + CUTLASS_HOST_DEVICE + TrivialIterator(Iterator const &it) {} + + /// Constructs an iterator from a PredicateVector + CUTLASS_HOST_DEVICE + TrivialIterator(PredicateVector const &_vec) {} + + /// Pre-increment + CUTLASS_HOST_DEVICE + TrivialIterator &operator++() { return *this; } + + /// Post-increment + CUTLASS_HOST_DEVICE + TrivialIterator operator++(int) { return *this; } + + /// Dereferences iterator + CUTLASS_HOST_DEVICE + bool operator*() const { return true; } + }; + + public: + // + // Methods + // + + /// Initialize the predicate vector + CUTLASS_HOST_DEVICE PredicateVector(bool value = true) { fill(value); } + + /// Fills all predicates with a given value + CUTLASS_HOST_DEVICE void fill(bool value = true) { + Storage item = (value ? ~Storage(0) : Storage(0)); + + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kWordCount; ++i) { + storage(i) = item; + } + } + + /// Accesses a bit within the predicate vector. + CUTLASS_HOST_DEVICE bool operator[](int idx) const { return at(idx); } + + /// Accesses a bit within the predicate vector. + CUTLASS_HOST_DEVICE bool at(int idx) const { + int bit, word; + computeStorageOffset(word, bit, idx); + + return ((storage(word) >> bit) & 1); + } + + /// Set a bit within the predicate vector. + CUTLASS_HOST_DEVICE void set(int idx, bool value = true) { + int bit, word; + computeStorageOffset(word, bit, idx); + + Storage disable_mask = (~(Storage(1) << bit)); + Storage enable_mask = (Storage(value) << bit); + + storage(word) = ((storage(word) & disable_mask) | enable_mask); + } + + /// Computes the intersection of two identical predicate vectors. + CUTLASS_HOST_DEVICE PredicateVector &operator&=(PredicateVector const &predicates) { + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kWordCount; ++i) { + storage(i) = (storage(i) & predicates.storage(i)); + } + return *this; + } + + /// Computes the union of two identical predicate vectors. + CUTLASS_HOST_DEVICE PredicateVector &operator|=(PredicateVector const &predicates) { + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < kWordCount; ++i) { + storage(i) = (storage(i) | predicates.storage(i)); + } + return *this; + } + + /// Returns true if entire predicate array is zero. + CUTLASS_HOST_DEVICE bool is_zero() const { + Storage mask(0); + for (int byte = 0; byte < sizeof(Storage); ++byte) { + Storage byte_mask = (((1 << kPredicatesPerByte) - 1) << kPredicateStart); + mask |= (byte_mask << (byte * 8)); + } + uint32_t result = 0; + for (int word = 0; word < kWordCount; ++word) { + result |= storage(word); + } + return result == 0; + } + + /// Returns an iterator to the start of the bit vector + CUTLASS_DEVICE + Iterator begin() { return Iterator(*this); } + + /// Returns an iterator + CUTLASS_DEVICE + Iterator end() { return Iterator(*this, kPredicates); } + + /// Returns a ConstIterator + CUTLASS_DEVICE + ConstIterator const_begin() const { return ConstIterator(*this); } + + /// Returns a ConstIterator + CUTLASS_DEVICE + ConstIterator const_end() const { return ConstIterator(*this, kPredicates); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Always returns true predicate. +struct TrivialPredicateTileAdapter { + /// Ctor. + CUTLASS_HOST_DEVICE TrivialPredicateTileAdapter() {} + + /// The value at location (d, h, w, c). + CUTLASS_HOST_DEVICE bool at(int, int, int, int) const { return true; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to enable random access to predicates via logical coordinate within a tile. +template +struct PredicateTileAdapter { + /// The vector of predicates. + typedef PredicateVector_ PredicateVector; + /// The iterations. + typedef Iterations_ Iterations; + + private: + /// The predicates. + PredicateVector &predicates; + + public: + /// Ctor. + CUTLASS_DEVICE PredicateTileAdapter(PredicateVector &predicates_) : predicates(predicates_) {} + + /// Get the value at location (d, h, w, c). + CUTLASS_DEVICE bool at(int d, int h, int w, int c) const { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + return predicates.at(bit); + } + + /// Set the value at location (d, h, w, c). + CUTLASS_DEVICE void set(int d, int h, int w, int c, bool value) { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + predicates.set(bit, value); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to enable random access to predicates via logical coordinate within a tile. +template +struct ConstPredicateTileAdapter { + /// The vector of predicates. + typedef PredicateVector_ PredicateVector; + /// The iterations. + typedef Iterations_ Iterations; + + private: + /// The predicates. + PredicateVector const &predicates; + + public: + /// Ctor. + CUTLASS_DEVICE ConstPredicateTileAdapter(PredicateVector const &predicates_) + : predicates(predicates_) {} + + /// Get the value at location (d, h, w, c). + CUTLASS_DEVICE bool at(int d, int h, int w, int c) const { + int const bit = ComputeOffsetFromShape::get(d, h, w, c); + return predicates.at(bit); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/reshape_tile.h b/cutlass/reshape_tile.h new file mode 100644 index 0000000000..55aebfcafb --- /dev/null +++ b/cutlass/reshape_tile.h @@ -0,0 +1,58 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a type for restructuring a tile. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// The following functor reshapes a tile of data. The goal is to have at least kAccessSize in +// the inner-most dimension. If the user respects that constraint, there is nothing to be done. If +// that's not the case, this functor will correct that and "extract" the right number of elements +// from the next dimension. + +template +struct ReshapeTile { + typedef Tile_ Tile; +}; + +template +struct ReshapeTile { + // Make sure the W dimension of the tile is large enough. + static_assert(Tile_::kW >= kAccessSize_, "The W dimension is too small"); + // Make sure the dimension can be divided by the number of scalars. + static_assert(Tile_::kW % kAccessSize_ == 0, "Not supported"); + // Collapse the W dimension. + typedef Shape Tile; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/shape.h b/cutlass/shape.h new file mode 100644 index 0000000000..f0f63d9c3a --- /dev/null +++ b/cutlass/shape.h @@ -0,0 +1,301 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines Shape implementing the Layout concept for representing a 4D hypercube of objects. +*/ +#pragma once + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup layout_concept Layout Concept +* @{ +* @par Implementations of \ref layout_concept are used to describe a cube with DxHxW elements and C +scalars per element. + A HxW slice of a cube is called an image and a cube consists of D images. +* +* @par Notations +* Let Layout be an implementation of the \ref layout_concept. +* +* @par Valid Expressions +* - Layout::D specifies the depth of a cube +* - Layout::H specifies the height of a cube +* - Layout::W specifies the height of a cube +* - Layout::C specifies the number of channels of each element in a cube +* - Layout::W_c specifies the number of scalars of each row in one image of a cube. +* - Layout::H_w specifies the number of elements in an image slice. +* - Layout::H_w_c_specifies the number of scalars in an image slice. +* - Layout::D_h_w specifies the number of elements in a cube. +* - Layout::D_h_w_c specifies the number of scalars in a cube. +* - Layout::Strides is a \ref layout_concept specifying the strides. +* @} +*/ + +/** +* @brief A Shape implementing \ref layout_concept describing the dimensions of a cube. +* @concept{layout_concept} +*/ +template +struct Shape { + /// The depth of the cube. + static int const kD = kD_; + /// The height of the cube. + static int const kH = kH_; + /// The width of the cube. + static int const kW = kW_; + /// The number of scalars per element. + static int const kC = kC_; +}; + +/** +* @brief Compute derived counted of a \ref layout_concept based class +*/ +template +struct ShapeCount { + /// The number of elements per row. + static int const kWc = Shape::kW * Shape::kC; + /// The number of pixels per image. + static int const kHw = Shape::kH * Shape::kW; + /// The number of elements per image. + static int const kHwc = Shape::kH * kWc; + /// The number of pixels per cube. + static int const kDhw = Shape::kD * kHw; + /// The number of elements in the 4D space. + static int const kDhwc = Shape::kD * kHwc; + /// The number of elements in the 4D space. + static int const kCount = kDhwc; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeScale { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeAdd { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeSub { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeMul { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeDiv { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeMax { + typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD), + (A_::kH > B_::kH ? A_::kH : B_::kH), + (A_::kW > B_::kW ? A_::kW : B_::kW), + (A_::kC > B_::kC ? A_::kC : B_::kC)> + Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeMin { + typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD), + (A_::kH < B_::kH ? A_::kH : B_::kH), + (A_::kW < B_::kW ? A_::kW : B_::kW), + (A_::kC < B_::kC ? A_::kC : B_::kC)> + Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ShapeStrides { + typedef Shape Shape; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube +* @tparam A \ref layout_concept where each dimension of the cube specifies the corresponding stride. +*/ +template +struct ComputeOffsetFromShape { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + // clang-format off + return d * Shape_::kH * Shape_::kW * Shape_::kC + + h * Shape_::kW * Shape_::kC + + w * Shape_::kC + + c; + // clang-format on + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with a depth of 1 +* @tparam kSh Elements in the H dimension +* @tparam kSw Elements in the W dimension +* @tparam kSc Separation between two elements in "elements" +*/ +template +struct ComputeOffsetFromShape > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + return h * kSw_ * kSc_ + w * kSc_ + c; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with one channel and a depth of 1 +* @tparam kSh Elements in the H dimension +* @tparam kSw Elements in the W dimension +*/ +template +struct ComputeOffsetFromShape > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * kSw_ + w; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube +* @tparam A \ref layout_concept where each dimension of the cube specifies the corresponding stride. +*/ +template +struct ComputeOffsetFromStrides { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with a depth of 1 +* @tparam S_h Stride in the H dimension in scalars +* @tparam S_w Stride in the W dimension in scalars +* @tparam S_c Stride between two scalars. +*/ +template +struct ComputeOffsetFromStrides > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { + return h * S_h_ + w * S_w_ + c * S_c_; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Compute the offset for the given coordinates in a cube with one channel and a depth of 1 +* @tparam S_h Stride in the H dimension in scalars +* @tparam S_w Stride in the W dimension in scalars +*/ +template +struct ComputeOffsetFromStrides > { + static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * S_h_ + w * S_w_; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. +* Afterwards compute the offset of those coordinates using Strides_ +* @tparam Threads_ The dimension of the cube the threadIdx.x value is mapped on +* @tparam Strides_ The strides to use when compute the offsets based on the coordinates of the cube. +*/ +template +struct ComputeThreadOffsetFromStrides { + static CUTLASS_DEVICE int get() { + // Decompose the thread index. + int c = threadIdx.x % Threads_::kC; + int w = threadIdx.x / Threads_::kC % Threads_::kW; + int h = threadIdx.x / Threads_::kC / Threads_::kW % Threads_::kH; + int d = threadIdx.x / Threads_::kC / Threads_::kW / Threads_::kH; + + // Compute the offset. + return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +/** +*@brief Specialization for D=1 +*/ +template +struct ComputeThreadOffsetFromStrides, Shape<1, S_h_, S_w_, S_c_> > { + static CUTLASS_DEVICE int get() { + // Decompose the thread index. + int c = threadIdx.x % T_c_; + int w = threadIdx.x / T_c_ % T_w_; + int h = threadIdx.x / T_c_ / T_w_ % T_h_; + + // Compute the offset. + return h * S_h_ + w * S_w_ + c * S_c_; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +*@brief Specialization for D=1 and C=1 +*/ +template +struct ComputeThreadOffsetFromStrides, Shape<1, S_h_, S_w_, 1> > { + static CUTLASS_DEVICE int get() { + // Decompose the thread index. + int w = threadIdx.x % T_w_; + int h = threadIdx.x / T_w_; + + // Compute the offset. + return h * S_h_ + w * S_w_; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tensor_ref.h b/cutlass/tensor_ref.h new file mode 100644 index 0000000000..8ef31e3b8f --- /dev/null +++ b/cutlass/tensor_ref.h @@ -0,0 +1,151 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a structure containing strides, bounds, and a pointer to tensor data. +*/ +#pragma once + +#include + +#include +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Structure modeling a pointer and stride into a tensor +template +class TensorRef { + public: + /// Data type of individual access + typedef Storage_ Storage; + + /// Rank of tensor + static int const Rank = Rank_; + + private: + // + // Data members + // + + /// Pointer to storage element + Storage* ptr_; + + /// Stride information + Coord stride_; + + public: + // + // Methods + // + + /// Default ctor + CUTLASS_HOST_DEVICE + TensorRef() : ptr_(nullptr) {} + + /// Constructs from a pointer, size, and stride + CUTLASS_HOST_DEVICE + TensorRef(Storage* ptr, Coord stride) : ptr_(ptr), stride_(stride) {} + + /// Updates the pointer, stride, and location within a TensorRef + CUTLASS_HOST_DEVICE + void reset(Storage* ptr = nullptr, Coord stride = Coord(0)) { + ptr_ = ptr; + stride_ = stride; + } + + /// Conversion function + template + TensorRef convert() { + Coord converted_stride; + for (int i = 0; i < Rank - 1; ++i) { + converted_stride[i] = stride_[i] * Extent::kValue / Extent::kValue; + } + converted_stride[Rank - 1] = stride_[Rank - 1]; + + return TensorRef(reinterpret_cast(ptr_), converted_stride); + } + + /// Returns true if the TensorRef may be safely accessed + CUTLASS_HOST_DEVICE + bool good() const { return ptr_ != nullptr; } + + /// Returns the pointer to referenced data + CUTLASS_HOST_DEVICE + Storage* data() const { return ptr_; } + + /// Returns the stride of the tensor + CUTLASS_HOST_DEVICE + Coord const& stride() const { return stride_; } + + /// Returns the stride of the tensor in the given dimension + CUTLASS_HOST_DEVICE + int const& stride(int dim) const { return stride_.at(dim); } + + /// Returns the maximum stride element as the 'leading dimension' + CUTLASS_HOST_DEVICE + int leading_dim() const { return __NV_STD_MAX(stride_[1], stride_[2]); } + + /// Computes the offset of an index from the origin of the tensor + CUTLASS_HOST_DEVICE + long long offset(Coord const& coord) const { + return stride_.template dot(coord); + } + + /// Returns a reference to the element at a given Coord + CUTLASS_HOST_DEVICE + Storage& at(Coord const& coord) const { return ptr_[offset(coord)]; } + + /// Element-wise accessor + Storage& operator[](Coord const& coord) const { return at(coord); } + + /// Returns a reference to the element at a given Coord + CUTLASS_HOST_DEVICE + Storage& at(int idx) const { return ptr_[idx]; } + + /// Element-wise accessor + Storage& operator[](int idx) const { return at(idx); } + + /// Adds an offset to the pointer + CUTLASS_HOST_DEVICE + TensorRef& advance(Coord const& b) { + ptr_ += offset(b); + return *this; + } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator+(Coord const& b) const { return TensorRef(ptr_ + offset(b), stride_); } + + /// Returns a TensorRef offset by a given amount + CUTLASS_HOST_DEVICE + TensorRef operator-(Coord const& b) const { return TensorRef(ptr_ - offset(b), stride_); } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tensor_view.h b/cutlass/tensor_view.h new file mode 100644 index 0000000000..89c6bd5716 --- /dev/null +++ b/cutlass/tensor_view.h @@ -0,0 +1,172 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a structure containing strides and a pointer to tensor data. +*/ + +#pragma once + +#include + +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Host-side reference implementation of tensor operations +template +class TensorView : public TensorRef { + public: + /// Reference and stride + typedef TensorRef Base; + + /// Reference and stride + typedef Base TensorRef_t; + + /// Reference to constant type + typedef TensorRef ConstTensorRef_t; + + /// Rank of tensor + static int const Rank = TensorRef_t::Rank; + + /// Type used to compute the offset of an element to the base of a tensor + typedef int Offset_t; + + /// Coordinate into tensor + typedef Coord Coord_t; + + private: + // + // Data members + // + + /// Pointer to pitch-linear memory + TensorRef_t ref_; + + /// Dimensions of coordinate (independent of stride) + Coord_t size_; + + public: + // + // Device and Host Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + TensorView() {} + + /// Constructs a Tensor_view from a TensorRef and size + CUTLASS_HOST_DEVICE + TensorView(TensorRef_t const& _ref, Coord_t const& _size) : Base(_ref), size_(_size) {} + + /// Returns true if the Tensor_view is bound to some memory + CUTLASS_HOST_DEVICE + bool good() const { return ref().good(); } + + /// Returns a pointer to data + CUTLASS_HOST_DEVICE + T* data() const { return ref().data(); } + + /// Updates the reference and size of a Tensor_view object + CUTLASS_HOST_DEVICE + void reset(TensorRef_t const& _ref = TensorRef_t(0), Coord_t const& _size = Coord_t()) { + Base::operator=(_ref); + size_ = _size; + } + + /// Accesses the tensor reference pointing to data + CUTLASS_HOST_DEVICE + TensorRef_t& ref() { return *this; } + + /// + CUTLASS_HOST_DEVICE + ConstTensorRef_t const_ref() { return ConstTensorRef_t(data(), stride()); } + + /// Accesses the tensor reference pointing to data + CUTLASS_HOST_DEVICE + TensorRef_t const& ref() const { return *this; } + + /// Accesses the size + CUTLASS_HOST_DEVICE + Coord_t const& size() const { return size_; } + + /// Accesses the size + CUTLASS_HOST_DEVICE + int size(int dim) const { return size_.at(dim); } + + /// Accesses the stride + CUTLASS_HOST_DEVICE + Coord_t const& stride() const { return ref().stride(); } + + /// Accesses the stride + CUTLASS_HOST_DEVICE + int const& stride(int dim) const { return ref().stride(dim); } + + /// Assigns the Tensor_view + CUTLASS_HOST_DEVICE + TensorView& operator=(TensorView const& _tensor) { + Base::operator=(_tensor._ref); + size_ = _tensor.size_; + return *this; + } + + /// Returns the index of an element + CUTLASS_HOST_DEVICE + Offset_t offset(Coord_t const& coord) const { return ref().offset(coord); } + + /// Determines whether a location is within a tensor + CUTLASS_HOST_DEVICE + bool contains(Coord_t const& coord) const { + for (int dim = 0; dim < Rank; ++dim) { + if (coord.at(dim) >= size_.at(dim)) { + return false; + } + } + return true; + } + + /// Element-wise accessor + CUTLASS_HOST_DEVICE + T& at(Coord_t const& coord) const { return ref().at(coord); } + + /// Element-wise accessor + T& operator[](Coord const& coord) const { return at(coord); } + + /// Element-wise accessor + CUTLASS_HOST_DEVICE + T& at(Offset_t idx) const { return ref().at(idx); } + + /// Returns a Tensor_view given location and size quantities + CUTLASS_HOST_DEVICE + TensorView subview(Coord_t const& location, Coord_t size) const { + return TensorView(ref() + location, size.clamp(size_ - location)); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/tile_iterator.h b/cutlass/tile_iterator.h new file mode 100644 index 0000000000..6543cebf98 --- /dev/null +++ b/cutlass/tile_iterator.h @@ -0,0 +1,881 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines the Tile Traits concept and iterators for loading and storing to tiles + efficiently. +*/ +#pragma once + +#include +#include +#include +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup tile_traits_concept Tile Traits Concept +@{ + +\ref tile_traits_concept is a type definining the shape of a tile and the distribution of accesses +by individual entities, either threads or other. + +@par Tile Traits Concept + Types satisfying \ref tile_traits_concept define the following members + - Tile - a type satisfying \ref layout_concept describing the dimensions of the tile + - Delta - a type satisfying \ref layout_concept describing the increments between accesses +along each dimension + - Iterations - a type satisfying \ref layout_concept describing the number of accesses +along each dimension + - Offset - the type of a functor computing the offset of each participating entity +as a Coord<4>. +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Specifies dimension in which post-increment accesses advance +struct IteratorAdvance { + enum Kind { kD, kH, kW }; +}; + +/// Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix +struct IteratorFragment { + enum Kind { kScalar, kWmmaMatrix }; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief A template defining \ref tile_traits_concept +* @concept{tile_traits_concept} +*/ +template +struct TileTraits { + /// Shape of the tile + typedef Tile_ Tile; + + /// Number of steps between accesses along each dimension + typedef Delta_ Delta; + + /// Number of accesses performed + typedef Iterations_ Iterations; + + /// Functor that returns the logical coordinate of each entity's initial offset in the tile + typedef ThreadOffset_ ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Iterator for accessing a stripmined tile in memory +template > +struct TileIteratorBase { + /// concept TileTraits + typedef Traits_ Traits; + + /// Scalar element + typedef Scalar_ Scalar; + + /// Fragment element + typedef FragmentElement_ FragmentElement; + + /// Specifies dimension in which post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Advance_; + + /// Specifies iterator storage fragment type (Scalar or WmmaMatrix) + static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_; + + /// Source or destination memory space + static MemorySpace::Kind const kMemorySpace = MemorySpace; + + /// Index type + typedef Index_ Index; + + /// Skew quantity + typedef Skew_ Skew; + + /// Tile shape + typedef typename Traits::Tile Tile; + + /// Distance along each dimension + typedef typename Traits::Delta Delta; + + /// The strides in each dimension between different loads/stores. + typedef typename Traits::ImmediateOffsetStrides ImmediateOffsetStrides; + + /// Iterations + typedef typename Traits::Iterations Iterations; + + /// Thread offset + typedef typename Traits::ThreadOffset ThreadOffset; + + /// The number of scalars accessed per load/store. + static int const kAccessSize = Tile::kC; + + /// The elements loaded/store by one instruction. + typedef typename Vectorize::Type AccessType; + + /// The size of storage needed per fragment + static int const kFragmentSize = + (kIteratorFragment == IteratorFragment::kWmmaMatrix ? 16 : sizeof(AccessType)); + /// The storage. + typedef Fragment::kCount, kFragmentSize> Storage; + /// The fragment. + typedef Fragment::kCount * kAccessSize> Fragment; + /// The fragment iterator. + typedef FragmentIterator FragmentIterator; + /// The fragment const iterator. + typedef FragmentConstIterator FragmentConstIterator; + /// The shape of the fragment. + typedef typename FragmentIterator::FragmentShape FragmentShape; + + /// Default predicate mask type + typedef PredicateVector::kCount> PredicateVector; + + // + // Params struct + // + + /// Parameters to the iterator + struct Params { + Index stride_d; + Index stride_h; + Index stride_w; + + Index inc_d; + Index inc_h; + Index inc_w; + + Index inc_advance; + + /// Initializes params + CUTLASS_HOST_DEVICE + int initialize(Index _stride_d, + Index _stride_h, + Index _stride_w, + Index _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + stride_d = _stride_d; + stride_h = _stride_h; + stride_w = _stride_w; + + inc_d = _inc_d; + inc_h = _inc_h; + inc_w = _inc_w; + inc_advance = _inc_advance; + + return 0; + } + + CUTLASS_HOST_DEVICE + int initialize(Index _stride_d, Index _stride_h, Index _stride_w) { + stride_d = _stride_d; + stride_h = _stride_h; + stride_w = _stride_w; + + inc_w = stride_w * Delta::kW; + inc_h = stride_h * Delta::kH - stride_w * Delta::kW * (Iterations::kW - 1); + + if (kAdvance == IteratorAdvance::kH) { + // Advance in the H dimension. + inc_d = 0; + } else if (kAdvance == IteratorAdvance::kW) { + // Advance in the W dimension. + inc_d = stride_w * Tile::kW - stride_h * Tile::kH; + } else { + // Advance in the D dimension. + inc_d = stride_d; + } + + inc_advance = 0; + + return 0; + } + + CUTLASS_HOST_DEVICE int initialize() { + stride_d = 0; + stride_h = 0; + stride_w = 1; + + inc_d = inc_h = inc_w = inc_advance = 0; + + return 0; + } + }; + + /// Is the iterator valid? + CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; } + + // + // Static function members + // + + /// Initializes a predicate vector + template + CUTLASS_DEVICE static void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &offset = make_Coord(0, 0, 0)) { + for (int d = 0; d < Iterations::kD; ++d) { + bool enable_d = (d * Delta::kD + offset[0] < bounds[0]); + for (int h = 0; h < Iterations::kH; ++h) { + bool enable_h = (h * Delta::kH + offset[1] < bounds[1]); + for (int w = 0; w < Iterations::kW; ++w) { + bool enable_w = (w * Tile::kC * Delta::kW + offset[2] < bounds[2]); + predicate_it.set(d, h, w, 0, enable_d && enable_h && enable_w); + } + } + } + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup tile_load_iterator_concept Tile Load Iterator Concept +@{ + +\ref tile_load_iterator_concept enables loading a tile from addressable memory into a fragment + +@par Tile Load Iterator Concept + Types satisfying \ref tile_load_iterator_concept define the following members + - PredicateVector - a \ref predicate_vector_concept with sufficient predicate storage for +each access implied by the tile traits + - Fragment - the destination fragment type satisfying \ref fragment_concept + - initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate +vector according to externally specified bounds + - load_post_increment(fragment, pred_it) - a method that loads a fragment and increments +the iterator to the next tile, guarded by a \ref predicate_iterator_concept + - load_post_increment(fragment) - a method that loads a fragment and increments the +iterator to the next tile + - load(fragment, pred_it) - a const method that loads a fragment, guarded by a \ref +predicate_iterator_concept + - load(fragment) - a method that loads a fragment + +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief An iterator implementing \ref tile_load_iterator_concept for loading a tile from memory +* @concept{tile_load_iterator_concept} +*/ +template > +struct TileLoadIterator : public TileIteratorBase { + /// Base class + typedef TileIteratorBase + Base; + + /// concept TileTraits + typedef typename Base::Traits Traits; + + /// Scalar element + typedef typename Base::Scalar Scalar; + + /// Fragment element + typedef typename Base::FragmentElement FragmentElement; + + /// Specifies in which dimension post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Base::kAdvance; + + /// Specifies type of iterator fragment storage (Salar or WmmaMatrix) + static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment; + + /// Source or destination memory space + static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace; + + /// Index type + typedef typename Base::Index Index; + + /// Skew quantity + typedef typename Base::Skew Skew; + + /// Tile shape + typedef typename Base::Tile Tile; + + /// Delta + typedef typename Base::Delta Delta; + + /// Iterations + typedef typename Base::Iterations Iterations; + + /// ThreadOffset functor + typedef typename Base::ThreadOffset ThreadOffset; + + /// Fragment type + typedef typename Base::FragmentShape FragmentShape; + + /// Memory access type + typedef typename Base::AccessType AccessType; + + /// Fragment definition + typedef typename Base::Fragment Fragment; + + /// Fragment iterator definition + typedef typename Base::FragmentIterator FragmentIterator; + + /// Fragment const iterator definition + typedef typename Base::FragmentConstIterator FragmentConstIterator; + + /// Default predicate mask type + typedef typename Base::PredicateVector PredicateVector; + + /// Storage object that may be loaded from + typedef typename Base::Storage SharedStorage; + + /// IteratorBase parameters + typedef typename Base::Params BaseParams; + + /// Do we require a fence? + enum { kRequiresLoadFence = Tile::kD == 1 }; + + /// The pointer type + typedef Scalar const *Pointer; + + /// Parameters + struct Params : public BaseParams { + /// Pointer to memory + Scalar const *pointer; + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + int initialize(SharedStorage const &storage) { + pointer = &storage[0]; + return 0; + } + + /// Initializes params to access a raw pointer + CUTLASS_HOST_DEVICE + int initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w) { + Base::Params::initialize(stride_d, stride_h, stride_w); + pointer = ptr; + return 0; + } + + /// Initializes params + CUTLASS_HOST_DEVICE + int initialize(Scalar const *ptr, + Index _stride_d, + Index _stride_h, + Index _stride_w, + Index _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + pointer = ptr; + Base::Params::initialize( + _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance); + return 0; + } + + // Initializes params to default values + CUTLASS_HOST_DEVICE + int initialize() { return Base::Params::initialize(); } + }; + + // + // Data members + // + + /// Parameters structure + Params params; + + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + + /// Stage argument enables wrapping after some number of tiles have been loaded. + int stage; + + // + // Static member functions + // + + /// Initializes a predicate vector + template + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &block_offset = make_Coord(0, + 0, + 0)) { + Base::initialize_predicates( + predicate_it, + bounds, + block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC)); + } + + // + // Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + TileLoadIterator() {} + + /// Constructs a tile load iterator + CUTLASS_HOST_DEVICE + TileLoadIterator(Params const &_params, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params), stage(0) { + thread_offset = thread_offset_func(); + + Index block_offset_h = 0; + Index block_offset_w = 0; + if (kAdvance == IteratorAdvance::kH) { + block_offset_h = block_offset[1]; + block_offset_w = block_offset[2]; + } else { + block_offset_h = block_offset[2]; + block_offset_w = block_offset[1]; + } + + params.pointer += block_offset[0] * params.stride_d + + (block_offset_h + thread_offset[1]) * params.stride_h + + (block_offset_w + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w; + } + + /// Constructs a tile load iterator + CUTLASS_HOST_DEVICE + TileLoadIterator(Params const &, + SharedStorage &shared_storage, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : stage(0) { + int const offset = thread_offset_func()[2]; + params.pointer = &shared_storage[offset]; + } + + /// Returns the current pointer + CUTLASS_HOST_DEVICE + Scalar const *data() const { return params.pointer; } + + /// Increment in the D dimension + CUTLASS_HOST_DEVICE void inc_d() { params.pointer += params.inc_d; } + + /// Increment in the H dimension + CUTLASS_HOST_DEVICE void inc_h() { params.pointer += params.inc_h; } + + /// Increment in the W dimension + CUTLASS_HOST_DEVICE void inc_w() { params.pointer += params.inc_w; } + + /// Increment in the next dimension + CUTLASS_HOST_DEVICE void inc_advance() { params.pointer += params.inc_advance; } + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + if (Tile::kD > 1) { + int const kStageSize = Tile::kH * Tile::kW * Tile::kC; + if (stage == Tile::kD - 1) { + params.pointer -= (Tile::kD - 1) * kStageSize; + stage = 0; + } else { + params.pointer += kStageSize; + stage = stage + 1; + } + } + } + + public: + /// Loads a fragment and advances the iterator to the next tile. + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) { + FragmentIterator frag_iterator(fragment); + + for (int d = 0; d < Iterations::kD; ++d) { + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w, ++pred_it) { + if (*pred_it) { + Load::load( + reinterpret_cast(frag_iterator.at(d, h, w, 0)), data(), 0); + } + + if (w < Iterations::kW - 1) { + inc_w(); + } + } + if (h < Iterations::kH - 1) { + inc_h(); + } + } + if (d < Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } + + /// Loads a fragment and advances the iterator to the next tile. + template + CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment) { + typename PredicateVector::TrivialIterator pred_it; + load_post_increment(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const { + TileLoadIterator _load_it(*this); + _load_it.load_post_increment(fragment, pred_it); + } + + /// Loads a fragment without advancing the iterator.. + template + CUTLASS_HOST_DEVICE void load(Fragment &fragment) const { + typename PredicateVector::TrivialIterator pred_it; + load(fragment, pred_it); + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/*!@defgroup tile_store_iterator_concept Tile Store Iterator Concept +@{ + +\ref tile_store_iterator_concept enables storing a tile to addressable memory + +@par Tile Store Iterator Concept + Types satisfying \ref tile_load_iterator_concept define the following members + - PredicateVector - a \ref predicate_vector_concept with sufficient predicate storage for +each access implied by the tile traits + - Fragment - the destination fragment type satisfying \ref fragment_concept + - initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate +vector according to externally specified bounds + - store_post_increment(fragment, pred_it) - a method that stores a fragment and increments +the iterator to the next tile, guarded by a \ref predicate_iterator_concept + - store_post_increment(fragment) - a method that stores a fragment and increments the +iterator to the next tile + - store(fragment, pred_it) - a const method that stores a fragment, guarded by a \ref +predicate_iterator_concept + - store(fragment) - a method that loads a fragment + +@} +*/ + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/** +* @brief An iterator implementing \ref tile_store_iterator_concept for storing a tile to memory +* @concept{tile_store_iterator_concept} +*/ +template > +struct TileStoreIterator : public TileIteratorBase { + /// Base class + typedef TileIteratorBase + Base; + + /// concept TileTraits + typedef typename Base::Traits Traits; + + /// Scalar element + typedef typename Base::Scalar Scalar; + + /// Fragment element + typedef typename Base::FragmentElement FragmentElement; + + /// Specifies in which dimension post-increment accesses advance. + static IteratorAdvance::Kind const kAdvance = Base::kAdvance; + + /// Specifies type of iterator fragment storage (Salar or WmmaMatrix) + static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment; + + /// Source or destination memory space + static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace; + + /// Index type + typedef typename Base::Index Index; + + /// Skew quantity + typedef typename Base::Skew Skew; + + /// Tile shape + typedef typename Base::Tile Tile; + + /// Delta + typedef typename Base::Delta Delta; + + /// Iterations + typedef typename Base::Iterations Iterations; + + /// ThreadOffset functor + typedef typename Base::ThreadOffset ThreadOffset; + + /// Fragment type + typedef typename Base::FragmentShape FragmentShape; + + /// Memory access type + typedef typename Base::AccessType AccessType; + + /// Fragment definition + typedef typename Base::Fragment Fragment; + + /// Fragment iterator definition + typedef typename Base::FragmentIterator FragmentIterator; + + /// Fragment const iterator definition + typedef typename Base::FragmentConstIterator FragmentConstIterator; + + /// Default predicate mask type + typedef typename Base::PredicateVector PredicateVector; + + /// Storage object which may be stored to + typedef typename Base::Storage SharedStorage; + + /// IteratorBase parameters + typedef typename Base::Params BaseParams; + + /// Parameters + struct Params : public BaseParams { + /// Pointer to memory + Scalar *pointer; + + /// Initialize params to access storage object + CUTLASS_HOST_DEVICE + int initialize(SharedStorage &storage) { + pointer = &storage[0]; + return 0; + } + + /// Initializes params to access a raw pointer + CUTLASS_HOST_DEVICE + int initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w) { + Base::Params::initialize(stride_d, stride_h, stride_w); + pointer = ptr; + return 0; + } + + /// Initializes params + CUTLASS_HOST_DEVICE + int initialize(Scalar *ptr, + Index _stride_d, + Index _stride_h, + Index _stride_w, + Index _inc_d, + Index _inc_h, + Index _inc_w, + Index _inc_advance) { + pointer = ptr; + Base::Params::initialize( + _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance); + return 0; + } + + /// Initializes params to default values + CUTLASS_HOST_DEVICE + int initialize() { return Base::Params::initialize(); } + }; + + // + // Data members + // + + /// Parameters structure + Params params; + + /// Offset of an individual lane from the start of the tile + Coord<4> thread_offset; + + /// The stage. + int stage; + + // + // Static member functions + // + + /// Initializes a predicate vector + template + CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, + Coord<3> const &bounds, + Coord<3> const &block_offset = make_Coord(0, + 0, + 0)) { + Base::initialize_predicates( + predicate_it, + bounds, + block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC)); + } + + // + // Methods + // + + /// Default constructor + CUTLASS_HOST_DEVICE + TileStoreIterator() {} + + /// Constructs a tile store iterator + CUTLASS_HOST_DEVICE + TileStoreIterator(Params const &_params, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : params(_params), stage(0) { + thread_offset = thread_offset_func(); + + params.pointer += block_offset[0] * params.stride_d + + (block_offset[1] + thread_offset[1]) * params.stride_h + + (block_offset[2] + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w; + } + + /// Constructs a tile store iterator + CUTLASS_HOST_DEVICE + TileStoreIterator(Params const &, + SharedStorage &shared_storage, + Coord<3> const &block_offset = make_Coord(0, 0, 0), + ThreadOffset thread_offset_func = ThreadOffset()) + : stage(0) { + int const offset = thread_offset_func()[2]; + params.pointer = &shared_storage[offset]; + } + + /// Returns the current pointer + CUTLASS_HOST_DEVICE + Scalar *data() const { return params.pointer; } + + /// Increment in the D dimension + CUTLASS_HOST_DEVICE void inc_d() { params.pointer += params.inc_d; } + + /// Increment in the H dimension + CUTLASS_HOST_DEVICE void inc_h() { params.pointer += params.inc_h; } + + /// Increment in the W dimension + CUTLASS_HOST_DEVICE void inc_w() { params.pointer += params.inc_w; } + + /// Increment in the next dimension + CUTLASS_HOST_DEVICE void inc_advance() {} + + /// Increment the stage. + CUTLASS_DEVICE void inc_stage() { + if (Tile::kD > 1) { + int const kStageSize = Tile::kH * Tile::kW * Tile::kC; + if (stage == Tile::kD - 1) { + params.pointer -= (Tile::kD - 1) * kStageSize; + stage = 0; + } else { + params.pointer += kStageSize; + stage = stage + 1; + } + } + } + + public: + /// Stores a fragment and advances to the next tile. + template + CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it) { + FragmentIterator frag_iterator(fragment); + + for (int d = 0; d < Iterations::kD; ++d) { + for (int h = 0; h < Iterations::kH; ++h) { + for (int w = 0; w < Iterations::kW; ++w, ++pred_it) { + if (*pred_it) { + Store::store( + reinterpret_cast(frag_iterator.at(d, h, w, 0)), data(), 0); + } + if (w < Iterations::kW - 1) { + inc_w(); + } + } + if (h < Iterations::kH - 1) { + inc_h(); + } + } + if (d < Iterations::kD - 1) { + inc_d(); + } + } + inc_advance(); + } + + /// Stores a fragment and advances to the next tile. + template + CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment) { + typename PredicateVector::TrivialIterator pred_it; + store_post_increment(fragment, pred_it); + } + + /// Stores a fragment without advancing the iterator. + template + CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const { + TileStoreIterator _store_it(*this); + _store_it.store_post_increment(fragment, pred_it); + } + + /// Stores a fragment without advancing the iterator. + template + CUTLASS_HOST_DEVICE void store(Fragment &fragment) const { + typename PredicateVector::TrivialIterator pred_it; + store(fragment, pred_it); + } +}; +} diff --git a/cutlass/tile_traits_standard.h b/cutlass/tile_traits_standard.h new file mode 100644 index 0000000000..14ecd01abc --- /dev/null +++ b/cutlass/tile_traits_standard.h @@ -0,0 +1,238 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines tile traits for several tile partitioning arrangements of threads expected to + achieve efficient streaming performance. +*/ +#pragma once + +#include + +namespace cutlass { + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Basic thread offset function computed from a thread shape +template +struct TiledThreadOffset { + /// Computes the logical coordinate from thread shape + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + Coord<4> thread_offset; + + int index = threadIdx.x; + + thread_offset[3] = (index % ThreadShape::kC); + index = (index / ThreadShape::kC); + + thread_offset[2] = (index % ThreadShape::kW); + index = (index / ThreadShape::kW); + + thread_offset[1] = (index % ThreadShape::kH); + index = (index / ThreadShape::kH); + + thread_offset[0] = index; + + return thread_offset; + } +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Tiling in which the number of threads is greater than the +/// contiguous dimension of the tile. +template +struct TileTraitsStrideMajor { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + // Static assertions + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + static_assert(Tile::kW <= kThreads, + "This specialization assumes there are more threads than the contiguous dimension " + "of the tile."); + + /// Shape of threads + typedef Shape<1, kThreads / Tile::kW, Tile::kW, 1> ThreadShape; + + /// Delta along each dimension + typedef Shape<1, ThreadShape::kH, 1, 1> Delta; + + /// Number of iterations + typedef Shape<1, Tile::kH / ThreadShape::kH, 1, 1> Iterations; + + /// Computes the initial offset + typedef TiledThreadOffset ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Tiling in which the number of threads is fewer than the tile size +/// in the contiguous dimension. +template +struct TileTraitsContiguousMajor { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + // Static assertions + static_assert(Tile::kW >= kThreads, + "This specialization assumes there are more threads than the contiguous dimension " + "of the tile."); + + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + static_assert(!(Tile::kW % kThreads), + "The contiguous size of the tile must be divisible by the number of threads."); + + /// Thread shape + typedef Shape<1, 1, kThreads> ThreadShape; + + /// Delta between each thread's access + typedef Shape<1, 1, kThreads> Delta; + + /// Number of iterations + typedef Shape<1, Tile::kH, Tile::kW / kThreads> Iterations; + + /// Computes the initial offset + typedef TiledThreadOffset ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Tiling in which warps rake across the contiguous dimension +template +struct TileTraitsWarpRake { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + /// Hard-coded warp size + static int const kWarpSize = 32; + + /// Number of participating warps + static int const kWarpCount = kThreads / kWarpSize; + + // Static assertions + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + static_assert(!(kThreads % kWarpSize), "Number of threads must be divisible by the warp size."); + + static_assert(!(Tile::kW % kWarpSize), "Contiguous dimension must be divisible by the warp size"); + + /// Warps strip-mined across strided dimension + static int const kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH); + + /// Warps stripmined contiguous dimension + static int const kWarpsContiguous = kWarpCount / kWarpsStrided; + + /// Arrangement of threads + typedef Shape<1, kWarpsStrided, kWarpsContiguous * kWarpSize> ThreadShape; + + /// The same warp rakes along the contiguous dimension + typedef Shape<1, kWarpsStrided, kWarpSize> Delta; + + /// Number of iterations + typedef Shape<1, Tile::kH / Delta::kH, Tile::kW / ThreadShape::kW> Iterations; + + /// Computes the thread offset in (H, W) based on thread ID + struct ThreadOffset { + /// Basic thread offset function computed from a thread shape + CUTLASS_HOST_DEVICE + Coord<4> operator()() const { + int tid = threadIdx.x; + int warp = (tid / kWarpSize); + int lane = (tid % kWarpSize); + + static int const kWarpSpanContiguous = kWarpSize * Iterations::kW; + + int warp_w = (warp % kWarpsContiguous); + int warp_h = (warp / kWarpsContiguous); + + return make_Coord(0, warp_h, lane + kWarpSpanContiguous * warp_w, 0); + } + }; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Chooses 'best' shape to enable warp raking along contiguous dimension if possible. +template +struct TileTraitsStandard { + /// Shape of tile + typedef Tile_ Tile; + + /// Number of participating threads + static int const kThreads = Threads; + + /// Hard-coded warp size + static int const kWarpSize = 32; + + /// Number of participating warps + static int const kWarpCount = kThreads / kWarpSize; + + // Static assertions + static_assert(!(ShapeCount::kDhw % kThreads), + "Tiling undefined if elements not divisible by threads."); + + /// Choose the stride-major contiguous tiling if the contiguous dimension is + /// smaller than the warp size. Otherwise, if it is divisible by the warp size, + /// choose the warp rake arrangement. + typedef typename platform::conditional < + Tile::kW, + typename platform::conditional, + TileTraitsContiguousMajor >::type>:: + type Traits; + + /// Delta between accesses + typedef typename Traits::Delta Delta; + + /// Delta between each thread's access + /// TODO MTA this is wrong for sure, but Delta is used for stride computation at the moment + typedef Delta ImmediateOffsetStrides; + + /// Number of accesses + typedef typename Traits::Iterations Iterations; + + /// Thread offset functor + typedef typename Traits::ThreadOffset ThreadOffset; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/util/cutlass_math.h b/cutlass/util/cutlass_math.h new file mode 100644 index 0000000000..0ecdc43829 --- /dev/null +++ b/cutlass/util/cutlass_math.h @@ -0,0 +1,131 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +/** + * \file + * \brief Math utilities + */ + +#include + +namespace cutlass { + +/****************************************************************************** + * Static math utilities + ******************************************************************************/ + +/** + * Statically determine if N is a power-of-two + */ +template +struct is_pow2 : platform::integral_constant {}; + +/** + * Statically determine log2(N), rounded down + */ +template +struct log2_down { + /// Static logarithm value + enum { value = log2_down> 1), Count + 1>::value }; +}; + +// Base case +template +struct log2_down { + enum { value = Count }; +}; + +/** + * Statically determine log2(N), rounded up + */ +template +struct log2_up { + /// Static logarithm value + enum { value = log2_up> 1), Count + 1>::value }; +}; + +// Base case +template +struct log2_up { + enum { value = ((1 << Count) < N) ? Count + 1 : Count }; +}; + +/** + * Statically estimate sqrt(N) to the nearest power-of-two + */ +template +struct sqrt_est { + enum { value = 1 << (log2_up::value / 2) }; +}; + +/** + * For performing a constant-division with a compile-time assertion that the + * Divisor evenly-divides the Dividend. + */ +template +struct divide_assert { + enum { value = Dividend / Divisor }; + + static_assert((Dividend % Divisor == 0), "Not an even multiple"); +}; + +/****************************************************************************** + * Rounding + ******************************************************************************/ + +/** + * Round dividend up to the nearest multiple of divisor + */ +template +CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor) { + return ((dividend + divisor - 1) / divisor) * divisor; +} + +/** + * Greatest common divisor + */ +template +CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b) { + for (;;) { + if (a == 0) return b; + b %= a; + if (b == 0) return a; + a %= b; + } +} + +/** + * Least common multiple + */ +template +CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) { + value_t temp = gcd(a, b); + + return temp ? (a / temp * b) : 0; +} + +} // namespace cutlass diff --git a/cutlass/util/debug.h b/cutlass/util/debug.h index 2aedd17adc..6055e3fcc6 100644 --- a/cutlass/util/debug.h +++ b/cutlass/util/debug.h @@ -1,29 +1,27 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - ******************************************************************************/ + **************************************************************************************************/ #pragma once @@ -44,87 +42,81 @@ namespace cutlass { * Formats and prints the given message to stdout */ #if !defined(CUDA_LOG) - #if !defined(__CUDA_ARCH__) - #define CUDA_LOG(format, ...) printf(format, __VA_ARGS__) - #else -inline __host__ __device__ unsigned get_threadidx_x() { return threadIdx.x; } -inline __host__ __device__ unsigned get_threadidx_y() { return threadIdx.y; } -inline __host__ __device__ unsigned get_threadidx_z() { return threadIdx.z; } -inline __host__ __device__ unsigned get_blockidx_x() { return blockIdx.x; } -inline __host__ __device__ unsigned get_blockidx_y() { return blockIdx.y; } -inline __host__ __device__ unsigned get_blockidx_z() { return blockIdx.z; } - #define CUDA_LOG(format, ...) \ - printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \ - get_blockidx_x(), get_blockidx_y(), get_blockidx_z(), \ - get_threadidx_x(), get_threadidx_y(), get_threadidx_z(), \ - __VA_ARGS__); - #endif +#if !defined(__CUDA_ARCH__) +#define CUDA_LOG(format, ...) printf(format, __VA_ARGS__) +#else +#define CUDA_LOG(format, ...) \ + printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \ + blockIdx.x, \ + blockIdx.y, \ + blockIdx.z, \ + threadIdx.x, \ + threadIdx.y, \ + threadIdx.z, \ + __VA_ARGS__); +#endif #endif - /** * Formats and prints the given message to stdout only if DEBUG is defined */ #if !defined(CUDA_LOG_DEBUG) - #ifdef DEBUG - #define CUDA_LOG_DEBUG(format, ...) CUDA_LOG(format, __VA_ARGS__) - #else - #define CUDA_LOG_DEBUG(format, ...) - #endif +#ifdef DEBUG +#define CUDA_LOG_DEBUG(format, ...) CUDA_LOG(format, __VA_ARGS__) +#else +#define CUDA_LOG_DEBUG(format, ...) +#endif #endif - /** - * \brief The corresponding error message is printed to \p stderr (or \p stdout in device code) along with the supplied source context. + * \brief The corresponding error message is printed to \p stderr (or \p stdout in device code) + * along with the supplied source context. * * \return The CUDA error. */ -__host__ __device__ inline cudaError_t cuda_perror_impl( - cudaError_t error, - const char* filename, - int line) -{ - (void)filename; - (void)line; - if (error) - { +__host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error, + const char* filename, + int line) { + (void)filename; + (void)line; + if (error) { #if !defined(__CUDA_ARCH__) - fprintf(stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); - fflush(stderr); + fprintf( + stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); + fflush(stderr); #else - printf("CUDA error %d [%s, %d]\n", error, filename, line); + printf("CUDA error %d [%s, %d]\n", error, filename, line); #endif - } - return error; + } + return error; } - /** * \brief Perror macro */ #ifndef CUDA_PERROR - #define CUDA_PERROR(e) cuda_perror_impl((cudaError_t) (e), __FILE__, __LINE__) +#define CUDA_PERROR(e) cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__) #endif - /** * \brief Perror macro with exit */ #ifndef CUDA_PERROR_EXIT - #define CUDA_PERROR_EXIT(e) if (cuda_perror_impl((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } +#define CUDA_PERROR_EXIT(e) \ + if (cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)) { \ + exit(1); \ + } #endif - /** * \brief Perror macro only if DEBUG is defined */ #ifndef CUDA_PERROR_DEBUG - #ifdef DEBUG - #define CUDA_PERROR_DEBUG(e) CUDA_PERROR(e) - #else - #define CUDA_PERROR_DEBUG(e) (e) - #endif +#ifdef DEBUG +#define CUDA_PERROR_DEBUG(e) CUDA_PERROR(e) +#else +#define CUDA_PERROR_DEBUG(e) (e) +#endif #endif - -} // namespace cutlass +} // namespace cutlass diff --git a/cutlass/util/device_introspection.h b/cutlass/util/device_introspection.h deleted file mode 100644 index b4946e0c50..0000000000 --- a/cutlass/util/device_introspection.h +++ /dev/null @@ -1,224 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Utilities for device introspection - */ - -#include "debug.h" -#include "nv_std.h" -#include "printable.h" - -namespace cutlass { - - -/****************************************************************************** - * math_operation_class_t - * - * Enumeration to select the appropriate math operation - * - * The assumption is multiple math operations may be used to compute GEMM - * for a given selection of operand and accumulator types. - * - ******************************************************************************/ - -/// Math operation -enum class math_operation_class_t -{ - scalar, // scalar (and vector) multiply-accumulate operations - matrix // Volta tensor operations -}; - -/****************************************************************************** - * arch_family_t - ******************************************************************************/ - -/** - * \brief Enumeration of NVIDIA GPU architectural families - */ -struct arch_family_t -{ - /// \brief Enumerants - enum kind_t - { - Unsupported = 0, - Kepler = 3, - Maxwell = 5, - Volta = 7, - }; - - /// Enumerant value - kind_t kind; - - /// Default constructor - arch_family_t() : kind(Unsupported) {} - - /// Copy constructor - arch_family_t(const kind_t &other_kind) : kind(other_kind) {} - - /// Cast to kind_t - operator kind_t() const { return kind; } - - /// Returns the instance as a string - __host__ __device__ inline - char const* to_string() const - { - switch (kind) - { - case Kepler: return "Kepler"; - case Maxwell: return "Maxwell"; - case Volta: return "Volta"; - case Unsupported: - default: return "Unsupported"; - } - } - - /// Insert the formatted instance into the output stream - void print(std::ostream& out) const { out << to_string(); } - -}; - - -/** - * Macro for architecture targeted by the current compiler pass - */ -#if defined(__CUDA_ARCH__) - #define CUTLASS_ARCH __CUDA_ARCH__ -#else - #define CUTLASS_ARCH 0 -#endif - - -/** - * Macro for architecture family targeted by the current compiler pass - */ -#define CUTLASS_ARCH_FAMILY \ - ( \ - (CUTLASS_ARCH < 300) ? \ - arch_family_t::Unsupported : \ - (CUTLASS_ARCH < 500) ? \ - arch_family_t::Kepler : \ - (CUTLASS_ARCH < 700) ? \ - arch_family_t::Maxwell : \ - arch_family_t::Volta \ - ) - - - - -/****************************************************************************** - * Device introspection - ******************************************************************************/ - -/** - * Empty kernel for querying PTX manifest metadata (e.g., version) for the current device - */ -template -__global__ void empty_kernel(void) { } - - - -/** - * \brief Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) - */ -cudaError_t ptx_version(int &version) -{ - struct Dummy - { - /// Type definition of the empty_kernel kernel entry point - typedef void (*EmptyKernelPtr)(); - - /// Force empty_kernel to be generated if this class is used - EmptyKernelPtr Empty() - { - return empty_kernel; - } - }; - - cudaError_t error = cudaSuccess; - do - { - cudaFuncAttributes empty_kernel_attrs; - if (CUDA_PERROR_DEBUG(error = cudaFuncGetAttributes(&empty_kernel_attrs, empty_kernel))) break; - version = empty_kernel_attrs.ptxVersion * 10; - } - while (0); - - return error; -} - - -/** - * \brief Retrieves the SM version (major * 100 + minor * 10) for the current device - */ -cudaError_t get_sm_version(int &sm_version) -{ - cudaError_t error = cudaSuccess; - - // Get device ordinal - int device_ordinal; - if (CUDA_PERROR_DEBUG(error = cudaGetDevice(&device_ordinal))) - return error; - - // Fill in SM version - int major, minor; - if (CUDA_PERROR_DEBUG(error = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) - return error; - if (CUDA_PERROR_DEBUG(error = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) - return error; - sm_version = major * 100 + minor * 10; - - return error; -} - - -/** - * \brief Retrieves the count for the current device - */ -cudaError_t get_sm_count(int &sm_count) -{ - cudaError_t error = cudaSuccess; - - // Get device ordinal - int device_ordinal; - if (CUDA_PERROR_DEBUG(error = cudaGetDevice(&device_ordinal))) - return error; - - // Get SM count - if (CUDA_PERROR_DEBUG(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) - return error; - - return error; -} - - -} // namespace cutlass - - diff --git a/cutlass/util/io_intrinsics.h b/cutlass/util/io_intrinsics.h deleted file mode 100644 index dca92da6b7..0000000000 --- a/cutlass/util/io_intrinsics.h +++ /dev/null @@ -1,492 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief I/O device intrinsics - */ - -#include -#include - -#include "nv_std.h" -#include "math.h" - -namespace cutlass { - - - - -/****************************************************************************** - * io_vector - ******************************************************************************/ - -/** - * Base aligned storage for IO vector - */ -template struct io_vector_base; -template struct __align__(1) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(2) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(4) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(8) io_vector_base { value_t buff[VectorItems]; }; -template struct __align__(16) io_vector_base { value_t buff[VectorItems]; }; - - -/** - * \brief Aligned vector type for coarsening data movement instructions - * - * Exposes the member constant \p VectorItems, the actual number of component - * values comprising the io_vector - */ -template < - typename value_t, ///< Component value type - int MaxVectorItems, ///< Maximum allowable component values - int MaxAlignBytes ///< Maximum allowable alignment - = __NV_STD_MIN(16, MaxVectorItems * sizeof(value_t)), - int AlignBytes ///< Actual alignment - = __NV_STD_MIN(sizeof(value_t) * MaxVectorItems, MaxAlignBytes), - int VectorItems ///< Actual number of component values - = divide_assert::value, - bool MustAlias ///< Whether we need to alias during loads/stores - = (VectorItems > 4)> -struct io_vector; - - -/** - * IO vector (specialization for VectorItems <= 4) - */ -template < - typename value_t, - int MaxVectorItems, - int MaxAlignBytes, - int _AlignBytes, - int _VectorItems> -struct io_vector < - value_t, - MaxVectorItems, - MaxAlignBytes, - _AlignBytes, - _VectorItems, - false> -: - io_vector_base -{ - enum - { - VectorItems = _VectorItems, - AlignBytes = _AlignBytes - }; - - static_assert(is_pow2::value, "I/O vector alignment must be a power-of-two."); - static_assert((AlignBytes <= 16), "I/O vector alignment must <= 16B."); - - inline __device__ - void load(const io_vector *ptr) - { - *this = *ptr; - } - - inline __device__ - void load(const value_t *ptr) - { - *this = *reinterpret_cast(ptr); - } - - - inline __device__ - void store(io_vector *ptr) const - { - *ptr = *this; - } - - inline __device__ - void store(value_t *ptr) const - { - *reinterpret_cast(ptr) = *this; - } -}; - - -/** - * IO vector (specialization for VectorItems > 4) - * - * NB: Workaround for NVCC not generating 128-bit loads/stores for aligned - * structures having component types < 32b - */ -template < - typename value_t, - int MaxVectorItems, - int MaxAlignBytes, - int _AlignBytes, - int _VectorItems> -struct io_vector < - value_t, - MaxVectorItems, - MaxAlignBytes, - _AlignBytes, - _VectorItems, - true> -: - io_vector_base -{ - enum - { - VectorItems = _VectorItems, - AlignBytes = _AlignBytes - }; - - static_assert(is_pow2::value, "I/O vector alignment must be a power-of-two."); - static_assert((AlignBytes <= 16), "I/O vector alignment must <= 16B."); - - typedef typename nv_std::conditional<(AlignBytes == 8), - uint2, // Use 8B load - uint4> // Use 16B load - ::type align_t; - - inline __device__ - void load(const io_vector *ptr) - { - *reinterpret_cast(this) = *reinterpret_cast(ptr); - } - - inline __device__ - void load(const value_t *ptr) - { - *reinterpret_cast(this) = *reinterpret_cast(ptr); - } - - - inline __device__ - void store(io_vector *ptr) const - { - *reinterpret_cast(ptr) = *reinterpret_cast(this); - } - - inline __device__ - void store(value_t *ptr) const - { - *reinterpret_cast(ptr) = *reinterpret_cast(this); - } - -}; - - - - - - - -/****************************************************************************** - * Macro expansions for vector loads - ******************************************************************************/ - -/** - * Define vector-4 LD specialization for the given load modifier - */ -#define CUTLASS_LD_V4(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - value_t (&dest)[4], \ - ptr_t ptr) \ - { \ - asm volatile ("ld."#load_modifier".v4."#ptx_type" {%0, %1, %2, %3}, [%4];\n" \ - : \ - "="#val_constraint(dest[0]), \ - "="#val_constraint(dest[1]), \ - "="#val_constraint(dest[2]), \ - "="#val_constraint(dest[3]) \ - : \ - #ptr_constraint(ptr)); \ - } - -/** - * Define vector-2 LD specialization for the given load modifier - */ -#define CUTLASS_LD_V2(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - value_t (&dest)[2], \ - ptr_t ptr) \ - { \ - asm volatile ("ld."#load_modifier".v2."#ptx_type" {%0, %1}, [%2];\n" \ - : \ - "="#val_constraint(dest[0]), \ - "="#val_constraint(dest[1]) \ - : \ - #ptr_constraint(ptr)); \ - } - - -/** - * Define vector-1 LD specialization for the given load modifier - */ -#define CUTLASS_LD_V1(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - value_t (&dest)[1], \ - ptr_t ptr) \ - { \ - asm volatile ("ld."#load_modifier"."#ptx_type" %0, [%1];\n" \ - : \ - "="#val_constraint(dest[0]) \ - : \ - #ptr_constraint(ptr)); \ - } - - -/** - * Define powers-of-two vector LD specializations - */ -#define CUTLASS_LD_ALL(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_LD_V4(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_LD_V2(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_LD_V1(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) - - -/****************************************************************************** - * Macro expansions for vector stores - ******************************************************************************/ - -/** - * Define vector-4 ST specialization for the given load modifier - */ -#define CUTLASS_ST_V4(f_name, value_t, store_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - ptr_t ptr, \ - const value_t (&src)[4]) \ - { \ - asm volatile ("st."#store_modifier".v4."#ptx_type" [%0], {%1, %2, %3, %4};\n" \ - : : \ - #ptr_constraint(ptr), \ - #val_constraint(src[0]), \ - #val_constraint(src[1]), \ - #val_constraint(src[2]), \ - #val_constraint(src[3])); \ - } - - -/** - * Define vector-2 ST specialization for the given load modifier - */ -#define CUTLASS_ST_V2(f_name, value_t, store_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - ptr_t ptr, \ - const value_t (&src)[2]) \ - { \ - asm volatile ("st."#store_modifier".v2."#ptx_type" [%0], {%1, %2};\n" \ - : : \ - #ptr_constraint(ptr), \ - #val_constraint(src[0]), \ - #val_constraint(src[1])); \ - } - -/** - * Define vector-1 ST specialization for the given load modifier - */ -#define CUTLASS_ST_V1(f_name, value_t, store_modifier, ptx_type, val_constraint, ptr_constraint) \ - template \ - inline __device__ \ - void f_name( \ - ptr_t ptr, \ - const value_t (&src)[1]) \ - { \ - asm volatile ("st."#store_modifier"."#ptx_type" [%0], %1;\n" \ - : : \ - #ptr_constraint(ptr), \ - #val_constraint(src[0])); \ - } - - -/** - * Define powers-of-two vector LD specializations - */ -#define CUTLASS_ST_ALL(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_ST_V4(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_ST_V2(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) \ - CUTLASS_ST_V1(f_name, value_t, load_modifier, ptx_type, val_constraint, ptr_constraint) - - - -/****************************************************************************** - * Macro expansions for vector IO - ******************************************************************************/ - -/** - * Define global and shared LD specializations - */ -#define CUTLASS_IO(value_t, ptx_type, val_constraint) \ - CUTLASS_LD_ALL(ldg_cg_internal, value_t, global.cg, ptx_type, val_constraint, l) \ - CUTLASS_ST_ALL(stg_cg_internal, value_t, global.cg, ptx_type, val_constraint, l) - - -// Define IO for useful types -CUTLASS_IO(double, f64, d) -CUTLASS_IO(float, f32, f) -CUTLASS_IO(int64_t, b64, l) -CUTLASS_IO(int32_t, b32, r) -CUTLASS_IO(int16_t, b16, h) - - -// Macro cleanup -#undef CUTLASS_IO -#undef CUTLASS_LD_ALL -#undef CUTLASS_LD_V4 -#undef CUTLASS_LD_V2 -#undef CUTLASS_LD_V1 -#undef CUTLASS_ST_ALL -#undef CUTLASS_ST_V4 -#undef CUTLASS_ST_V2 -#undef CUTLASS_ST_V1 - - -/****************************************************************************** - * I/O cast types - ******************************************************************************/ - -/// Provides the type for which to reinterpret-cast a given vector -template < - typename value_t, - int IoVecDim, - int ValueBytes = sizeof(value_t)> -struct io_cast -{ - typedef value_t type[IoVecDim]; -}; - - -/// Provides the type for which to reinterpret-cast a vector of 1B types -template < - typename value_t, - int IoVecDim> -struct io_cast -{ - typedef typename nv_std::conditional< - (IoVecDim < 2), - int8_t[1], // Use 8b load - typename nv_std::conditional< - (IoVecDim < 4), - int16_t[1], // Use 16b load - int32_t[IoVecDim / 4]>::type>::type // Use up to 128b load - type; -}; - - -/// Provides the type for which to reinterpret-cast a vector of 2B types -template < - typename value_t, - int IoVecDim> -struct io_cast -{ - typedef typename nv_std::conditional< - (IoVecDim < 2), - int16_t[1], // Use 16b load - int32_t[IoVecDim / 2]>::type // Use up to 128b load - type; -}; - - - -/****************************************************************************** - * ldg_cg intrinsics - ******************************************************************************/ - -/// Load from global (cache-global modifier) -template -inline __device__ -void ldg_cg( - value_t &dest, - ptr_t d_in) -{ - // Cast dest to a different array type if necessary - ldg_cg_internal( - reinterpret_cast::type &>(dest), - d_in); -} - -/// Load from global (cache-global modifier) -template -inline __device__ -void ldg_cg( - value_t (&dest)[IoVecDim], - ptr_t d_in) -{ - static_assert(is_pow2::value, "I/O vectors must be a power-of-two."); - - // Cast dest to a different array type if necessary - ldg_cg_internal( - reinterpret_cast::type &>(dest), - d_in); -} - - -/****************************************************************************** - * stg_cg intrinsics - ******************************************************************************/ - -/// Store to global (cache-global modifier) -template -inline __device__ -void stg_cg( - ptr_t dest, - const value_t &src) -{ - // Cast src to a different array type if necessary - stg_cg_internal( - dest, - reinterpret_cast::type &>(src)); -} - -/// Store to global (cache-global modifier) -template -inline __device__ -void stg_cg( - ptr_t dest, - const value_t (&src)[IoVecDim]) -{ - static_assert(is_pow2::value, "I/O vectors must be a power-of-two."); - - // Cast src to a different array type if necessary - stg_cg_internal( - dest, - reinterpret_cast::type &>(src)); -} - - - - - -} // namespace cutlass - diff --git a/cutlass/util/math.h b/cutlass/util/math.h deleted file mode 100644 index bddad6712e..0000000000 --- a/cutlass/util/math.h +++ /dev/null @@ -1,167 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Math utilities - */ - -#include "nv_std.h" - -namespace cutlass { - - -/****************************************************************************** - * Static math utilities - ******************************************************************************/ - -/** - * Statically determine if N is a power-of-two - */ -template -struct is_pow2 : nv_std::integral_constant -{}; - - - - - -/** - * Statically determine log2(N), rounded down - */ -template -struct log2_down -{ - /// Static logarithm value - enum { value = log2_down> 1), Count + 1>::value }; -}; - -// Base case -template -struct log2_down -{ - enum { value = Count }; -}; - - - - -/** - * Statically determine log2(N), rounded up - */ -template -struct log2_up -{ - /// Static logarithm value - enum { value = log2_up> 1), Count + 1>::value }; -}; - -// Base case -template -struct log2_up -{ - enum { value = ((1 << Count) < N) ? Count + 1 : Count }; -}; - - - -/** - * Statically estimate sqrt(N) to the nearest power-of-two - */ -template -struct sqrt_est -{ - enum { value = 1 << (log2_up::value / 2) }; -}; - - - -/** - * For performing a constant-division with a compile-time assertion that the - * Divisor evenly-divides the Dividend. - */ -template -struct divide_assert -{ - enum { value = Dividend / Divisor}; - - static_assert((Dividend % Divisor == 0), "Not an even multiple"); -}; - - - - - -/****************************************************************************** - * Rounding - ******************************************************************************/ - -/** - * Round dividend up to the nearest multiple of divisor - */ -template -inline __host__ __device__ -dividend_t round_nearest(dividend_t dividend, divisor_t divisor) -{ - return ((dividend + divisor - 1) / divisor) * divisor; -} - - -/** - * Greatest common divisor - */ -template -inline __host__ __device__ -value_t gcd(value_t a, value_t b) -{ - for (;;) - { - if (a == 0) return b; - b %= a; - if (b == 0) return a; - a %= b; - } -} - - -/** - * Least common multiple - */ -template -inline __host__ __device__ -value_t lcm(value_t a, value_t b) -{ - value_t temp = gcd(a, b); - - return temp ? (a / temp * b) : 0; -} - - -} // namespace cutlass - diff --git a/cutlass/util/matrix_transform.h b/cutlass/util/matrix_transform.h deleted file mode 100644 index f3341e92c7..0000000000 --- a/cutlass/util/matrix_transform.h +++ /dev/null @@ -1,102 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Enumeration of dense matrix view transformations - */ - -#include "printable.h" - -namespace cutlass { - - -/****************************************************************************** - * matrix_transform_t - ******************************************************************************/ - -/** - * \brief Enumeration of dense matrix view transformations - * - * These enumerators (and corresponding tag types) describe which view - * transformation needs to be applied prior to operation upon a given dense - * matrix. Its values correspond to Fortran characters 'n' (non-transpose), - * 't'(transpose) and 'c'(conjugate transpose) that are often - * used as parameters to legacy BLAS implementations - */ -struct matrix_transform_t : printable_t -{ - /// \brief Enumerants (same as CUBLAS) - enum kind_t - { - /// Invalid view - Invalid = -1, - - /// Non-transpose view - NonTranspose = 0, - - /// Transpose view - Transpose = 1, - - /// Conjugate transpose view - ConjugateTranpose = 2, - }; - - /// Enumerant value - kind_t kind; - - /// Default constructor - matrix_transform_t() : kind(Invalid) {} - - /// Copy constructor - matrix_transform_t(const kind_t &other_kind) : kind(other_kind) {} - - /// Cast to kind_t - operator kind_t() const { return kind; } - - /// Returns the instance as a string - __host__ __device__ inline - char const* to_string() const - { - switch (kind) - { - case NonTranspose: return "NonTranspose"; - case Transpose: return "Transpose"; - case ConjugateTranpose: return "ConjugateTranpose"; - default: return "Invalid"; - } - } - - /// Insert the formatted instance into the output stream - void print(std::ostream& out) const { out << to_string(); } - -}; - - -} // namespace cutlass diff --git a/cutlass/util/nv_std.h b/cutlass/util/nv_std.h deleted file mode 100644 index 819df3a038..0000000000 --- a/cutlass/util/nv_std.h +++ /dev/null @@ -1,705 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief C++ features that may be otherwise unimplemented for CUDA device functions. - * - * This file has three components: - * - * (1) Macros: - * - Empty macro defines for C++ keywords not supported by the current - * version of C++. These simply allow compilation to proceed (but do - * not provide the added semantics). - * - \p noexcept - * - \p constexpr - * - \p nullptr - * - \p static_assert - * - * - Macro functions that we need in constant expressions because the - * C++ equivalents require constexpr compiler support. These are - * prefixed with \p __NV_STD_* - * - \p __NV_STD_MAX - * - \p __NV_STD_MIN - * - * (2) Re-implementations of STL functions and types: - * - C++ features that need the \p __device__ annotation. These are - * placed into the \p nv_std namespace. - * - \p plus - * - \p less - * - \p greater - * - \p min - * - \p max - * - \p methods on std::pair (==, !=, <, <=, >, >=, and make_pair()) - * - * (3) Stop-gap implementations of unsupported STL functions and types: - * - STL functions and types defined by C++ 11/14/17/etc. that are not - * provided by the current version of C++. These are placed into the - * \p nv_std namespace - * - \p integral_constant - * - \p nullptr_t - * - \p true_type - * - \p false_type - * - \p bool_constant - * - \p enable_if - * - \p conditional - * - \p is_same - * - \p is_base_of - * - \p remove_const - * - \p remove_volatile - * - \p remove_cv - * - \p is_volatile - * - \p is_pointer - * - \p is_void - * - \p is_integral - * - \p is_floating_point - * - \p is_arithmetic - * - \p is_fundamental - * - \p is_trivially_copyable - * - \p alignment_of - * - \p aligned_storage - * - * (4) Functions and types that are STL-like (but aren't in the STL): - * - \p TODO: min and max functors? - * - * The idea is that, as we drop support for older compilers, we can simply #define - * the \p __NV_STD_XYZ macros and \p nv_std namespace to alias their C++ - * counterparts (or trivially find-and-replace their occurrences in code text). - */ - - -//----------------------------------------------------------------------------- -// Include STL files that nv_std provides functionality for -//----------------------------------------------------------------------------- - -#include // nullptr_t -#include // Minimum/maximum operations -#include // Arithmetic operations -#include // For methods on std::pair -#if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500)) - #include // For integral constants, conditional metaprogramming, and type traits -#endif - - - -/****************************************************************************** - * Macros - ******************************************************************************/ -//----------------------------------------------------------------------------- -// Keywords -//----------------------------------------------------------------------------- - -/// noexcept, constexpr -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) - #ifndef noexcept - #define noexcept - #endif - #ifndef constexpr - #define constexpr - #endif -#endif - -/// nullptr -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310 )) - #ifndef nullptr - #define nullptr 0 - #endif -#endif - -/// static_assert -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600 )) - #ifndef static_assert - #define __nv_std_cat_(a, b) a ## b - #define __nv_std_cat(a, b) __nv_std_cat_(a, b) - #define static_assert(__e, __m) typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1] - #endif -#endif - - -//----------------------------------------------------------------------------- -// Functions -//----------------------------------------------------------------------------- - -/// Select maximum(a, b) -#ifndef __NV_STD_MAX - #define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a)) -#endif - -/// Select minimum(a, b) -#ifndef __NV_STD_MIN - #define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a)) -#endif - - - - -/****************************************************************************** - * Re-implementations - ******************************************************************************/ - -namespace nv_std { - - //----------------------------------------------------------------------------- - // Arithmetic operations, comparisons - //----------------------------------------------------------------------------- - - /// nv_std::plus - template - struct plus - { - inline __host__ __device__ - constexpr T operator()(const T &lhs, const T &rhs) const - { - return lhs + rhs; - } - }; - - - /// std::less - template - struct less - { - inline __host__ __device__ - constexpr bool operator()(const T &lhs, const T &rhs) const - { - return lhs < rhs; - } - }; - - /// std::greater - template - struct greater - { - inline __host__ __device__ - constexpr bool operator()(const T &lhs, const T &rhs) const - { - return lhs > rhs; - } - }; - - - //----------------------------------------------------------------------------- - // Minimum/maximum operations - //----------------------------------------------------------------------------- - - /// std::min - template - inline __host__ __device__ - constexpr const T& min( - const T& a, - const T& b) - { - return (b < a) ? b : a; - } - - /// std::max - template - inline __host__ __device__ - constexpr const T& max( - const T& a, - const T& b) - { - return (a < b) ? b : a; - } - - - //----------------------------------------------------------------------------- - // Methods on std::pair - //----------------------------------------------------------------------------- - - using std::pair; - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator==( const pair& lhs, const pair& rhs ) - { - return (lhs.first == rhs.first) && (lhs.second == rhs.second); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator!=( const pair& lhs, const pair& rhs ) - { - return (lhs.first != rhs.first) && (lhs.second != rhs.second); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator<( const pair& lhs, const pair& rhs ) - { - return (lhs.first < rhs.first) ? - true : - (rhs.first < lhs.first) ? - false : - (lhs.second < rhs.second); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator<=( const pair& lhs, const pair& rhs ) - { - return !(rhs < lhs); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator>( const pair& lhs, const pair& rhs ) - { - return (rhs < lhs); - } - - template< class T1, class T2 > - inline __host__ __device__ - constexpr bool operator>=( const pair& lhs, const pair& rhs ) - { - return !(lhs < rhs); - } - - template< class T1, class T2 > - inline __host__ __device__ - std::pair make_pair( T1 t, T2 u ) - { - std::pair retval; - retval.first = t; - retval.second = u; - return retval; - } - -} // namespace nv_std - - - -/****************************************************************************** - * Implementations of C++ 11/14/17/... STL features - ******************************************************************************/ - -namespace nv_std { - -//----------------------------------------------------------------------------- -// Integral constant helper types -//----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::integral_constant - template - struct integral_constant; - - /// std::integral_constant - template - struct integral_constant - { - static const value_t value = V; - - typedef value_t value_type; - typedef integral_constant type; - - inline __host__ __device__ operator value_type() const - { - return value; - } - - inline __host__ __device__ const value_type operator()() const - { - return value; - } - }; - - -#else - - using std::integral_constant; - using std::pair; - -#endif - - /// The type used as a compile-time boolean with true value. - typedef integral_constant true_type; - - /// The type used as a compile-time boolean with false value. - typedef integral_constant false_type; - - -#if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) - - /// std::bool_constant - template - struct bool_constant : nv_std::integral_constant - {}; - -#else - - using std::bool_constant; - -#endif - - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700)) - - /// std::nullptr_t - struct nullptr_t {}; - -#else - - using std::nullptr_t; - -#endif - - - - //----------------------------------------------------------------------------- - // Conditional metaprogramming - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600)) - - /// std::enable_if (true specialization) - template - struct enable_if { - typedef T type; - }; - - /// std::enable_if (false specialization) - template - struct enable_if { }; - - - /// std::conditional (true specialization) - template - struct conditional { typedef T type; }; - - /// std::conditional (false specialization) - template - struct conditional { typedef F type; }; - -#else - - using std::enable_if; - using std::conditional; - -#endif - - - - //----------------------------------------------------------------------------- - // Const/volatility specifiers - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::remove_const (non-const specialization) - template struct remove_const { typedef T type; }; - - /// std::remove_const (const specialization) - template struct remove_const { typedef T type; }; - - - - /// std::remove_volatile (non-volatile specialization) - template struct remove_volatile { typedef T type; }; - - /// std::remove_volatile (volatile specialization) - template struct remove_volatile { typedef T type; }; - - - - /// std::remove_cv - template - struct remove_cv { - typedef typename remove_volatile::type>::type type; - }; - -#else - - using std::remove_const; - using std::remove_volatile; - using std::remove_cv; - -#endif - - - //----------------------------------------------------------------------------- - // Type relationships - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::is_same (false specialization) - template - struct is_same : false_type - {}; - - /// std::is_same (true specialization) - template - struct is_same : true_type - {}; - - - /// Helper for std::is_base_of - template - struct is_base_of_helper - { - typedef char (&yes)[1]; - typedef char (&no)[2]; - - template - struct dummy - { - operator B*() const; - operator D*(); - }; - - template - static yes check(DerivedT*, T); - - static no check(BaseT*, int); - - static const bool value = sizeof(check(dummy(), int())) == sizeof(yes); - }; - - /// std::is_base_of - template - struct is_base_of : integral_constant< - bool, - (is_base_of_helper::type, typename remove_cv::type>::value) || - (is_same::type, typename remove_cv::type>::value)> - {}; - - -#else - - using std::is_same; - using std::is_base_of; - -#endif - - - - //----------------------------------------------------------------------------- - // Type properties - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - /// std::is_volatile - template struct is_volatile : false_type {}; - template struct is_volatile : true_type {}; - - - /// Helper for std::is_pointer (false specialization) - template struct is_pointer_helper : false_type {}; - - /// Helper for std::is_pointer (true specialization) - template struct is_pointer_helper : true_type {}; - - /// std::is_pointer - template struct is_pointer : is_pointer_helper::type> {}; - - - - /// std::is_void - template - struct is_void : is_same::type> - {}; - - - - /// std::is_integral - template struct is_integral : false_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template <> struct is_integral : true_type {}; - template struct is_integral : is_integral {}; - template struct is_integral : is_integral {}; - template struct is_integral : is_integral {}; - - - - /// std::is_floating_point - template - struct is_floating_point : integral_constant< - bool, - (is_same::type>::value || - is_same::type>::value)> - {}; - - - - /// std::is_arithmetic - template - struct is_arithmetic : - integral_constant::value || is_floating_point::value)> - {}; - - - /// std::is_fundamental - template - struct is_fundamental : integral_constant< - bool, (is_arithmetic::value || - is_void::value || - is_same::type>::value)> - {}; - - - - -#else - - using std::is_volatile; - using std::is_pointer; - using std::is_void; - using std::is_integral; - using std::is_floating_point; - using std::is_arithmetic; - using std::is_fundamental; - -#endif - - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || \ - (defined(_MSC_VER) && (_MSC_VER < 1800)) || \ - (defined(__GNUG__) && (__GNUC__ < 5)) - - /** - * std::is_trivially_copyable - * - * This implementation only evaluates true if T is fundamental or pointer - * - * Without help from partial template specializations provided by the user for - * a specific class or struct, this trait will never report that the specified - * class or struct is trivially-copyable ; this is always safe, - * if possibly sub-optimal. - */ - template - struct is_trivially_copyable : - integral_constant::value || is_pointer::value)> - {}; - -#else - - using std::is_trivially_copyable; - -#endif - - - - - //----------------------------------------------------------------------------- - // Alignment and layout utilities - //----------------------------------------------------------------------------- - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) - - - /// std::alignment_of - template - struct alignment_of - { - struct pad - { - value_t val; - char byte; - }; - - enum - { - value = sizeof(pad) - sizeof(value_t) - }; - }; - -#else - - template - struct alignment_of : std::alignment_of {}; - -#endif - - /* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */ - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - template <> struct alignment_of { enum { value = 16 }; }; - - // Specializations for volatile/const qualified types - template struct alignment_of : alignment_of {}; - template struct alignment_of : alignment_of {}; - template struct alignment_of : alignment_of {}; - - - -#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) - - template struct aligned_chunk; - template<> struct __align__(1) aligned_chunk<1> { uint8_t buff; }; - template<> struct __align__(2) aligned_chunk<2> { uint16_t buff; }; - template<> struct __align__(4) aligned_chunk<4> { uint32_t buff; }; - template<> struct __align__(8) aligned_chunk<8> { uint32_t buff[2]; }; - template<> struct __align__(16) aligned_chunk<16> { uint32_t buff[4]; }; - template<> struct __align__(32) aligned_chunk<32> { uint32_t buff[8]; }; - template<> struct __align__(64) aligned_chunk<64> { uint32_t buff[16]; }; - template<> struct __align__(128) aligned_chunk<128> { uint32_t buff[32]; }; - template<> struct __align__(256) aligned_chunk<256> { uint32_t buff[64]; }; - template<> struct __align__(512) aligned_chunk<512> { uint32_t buff[128]; }; - template<> struct __align__(1024) aligned_chunk<1024> { uint32_t buff[256]; }; - template<> struct __align__(2048) aligned_chunk<2048> { uint32_t buff[512]; }; - template<> struct __align__(4096) aligned_chunk<4096> { uint32_t buff[1024]; }; - - /// std::aligned_storage - template - struct aligned_storage - { - typedef aligned_chunk type[Len / sizeof(aligned_chunk)]; - }; - -#else - - using std::aligned_storage; - -#endif - - - - -}; // namespace nv_std - diff --git a/cutlass/util/platform.h b/cutlass/util/platform.h new file mode 100644 index 0000000000..32c41a67a0 --- /dev/null +++ b/cutlass/util/platform.h @@ -0,0 +1,801 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#pragma once + +/** + * \file + * \brief C++ features that may be otherwise unimplemented for CUDA device functions. + * + * This file has three components: + * + * (1) Macros: + * - Empty macro defines for C++ keywords not supported by the current + * version of C++. These simply allow compilation to proceed (but do + * not provide the added semantics). + * - \p noexcept + * - \p constexpr + * - \p nullptr + * - \p static_assert + * + * - Macro functions that we need in constant expressions because the + * C++ equivalents require constexpr compiler support. These are + * prefixed with \p __NV_STD_* + * - \p __NV_STD_MAX + * - \p __NV_STD_MIN + * + * (2) Re-implementations of STL functions and types: + * - C++ features that need the \p __device__ annotation. These are + * placed into the \p platform namespace. + * - \p plus + * - \p less + * - \p greater + * - \p min + * - \p max + * - \p methods on std::pair (==, !=, <, <=, >, >=, and make_pair()) + * + * (3) Stop-gap implementations of unsupported STL functions and types: + * - STL functions and types defined by C++ 11/14/17/etc. that are not + * provided by the current version of C++. These are placed into the + * \p platform namespace + * - \p integral_constant + * - \p nullptr_t + * - \p true_type + * - \p false_type + * - \p bool_constant + * - \p enable_if + * - \p conditional + * - \p is_same + * - \p is_base_of + * - \p remove_const + * - \p remove_volatile + * - \p remove_cv + * - \p is_volatile + * - \p is_pointer + * - \p is_void + * - \p is_integral + * - \p is_floating_point + * - \p is_arithmetic + * - \p is_fundamental + * - \p is_trivially_copyable + * - \p alignment_of + * - \p aligned_storage + * + * (4) Functions and types that are STL-like (but aren't in the STL): + * - \p TODO: min and max functors? + * + * The idea is that, as we drop support for older compilers, we can simply #define + * the \p __NV_STD_XYZ macros and \p platform namespace to alias their C++ + * counterparts (or trivially find-and-replace their occurrences in code text). + */ + +//----------------------------------------------------------------------------- +// Dependencies +//----------------------------------------------------------------------------- + +#include + +#if !defined(__CUDACC_RTC__) +//----------------------------------------------------------------------------- +// Include STL files that platform provides functionality for +//----------------------------------------------------------------------------- + +#include // Minimum/maximum operations +#include // nullptr_t +#include // Arithmetic operations +#include // For methods on std::pair +#if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500)) +#include // For integral constants, conditional metaprogramming, and type traits +#endif + +#include + +#endif +/****************************************************************************** + * Macros + ******************************************************************************/ +//----------------------------------------------------------------------------- +// Keywords +//----------------------------------------------------------------------------- + +/// noexcept, constexpr +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) +#ifndef noexcept +#define noexcept +#endif +#ifndef constexpr +#define constexpr +#endif +#endif + +/// nullptr +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310)) +#ifndef nullptr +#define nullptr 0 +#endif +#endif + +/// static_assert +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600)) +#ifndef static_assert +#define __platform_cat_(a, b) a##b +#define __platform_cat(a, b) __platform_cat_(a, b) +#define static_assert(__e, __m) typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1] +#endif +#endif + +//----------------------------------------------------------------------------- +// Functions +//----------------------------------------------------------------------------- + +/// Select maximum(a, b) +#ifndef __NV_STD_MAX +#define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a)) +#endif + +/// Select minimum(a, b) +#ifndef __NV_STD_MIN +#define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a)) +#endif + +/****************************************************************************** + * Re-implementations + ******************************************************************************/ +namespace cutlass { +namespace platform { + +//----------------------------------------------------------------------------- +// Arithmetic operations, comparisons +//----------------------------------------------------------------------------- + +/// platform::plus +template +struct plus { + CUTLASS_HOST_DEVICE constexpr T operator()(const T& lhs, const T& rhs) const { return lhs + rhs; } +}; + +/// std::less +template +struct less { + CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const { + return lhs < rhs; + } +}; + +/// std::greater +template +struct greater { + CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const { + return lhs > rhs; + } +}; + +//----------------------------------------------------------------------------- +// Minimum/maximum operations +//----------------------------------------------------------------------------- + +/// std::min +template +CUTLASS_HOST_DEVICE constexpr const T& min(const T& a, const T& b) { + return (b < a) ? b : a; +} + +/// std::max +template +CUTLASS_HOST_DEVICE constexpr const T& max(const T& a, const T& b) { + return (a < b) ? b : a; +} + +#if !defined(__CUDACC_RTC__) +//----------------------------------------------------------------------------- +// Methods on std::pair +//----------------------------------------------------------------------------- + +using std::pair; + +template +CUTLASS_HOST_DEVICE constexpr bool operator==(const pair& lhs, const pair& rhs) { + return (lhs.first == rhs.first) && (lhs.second == rhs.second); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair& lhs, const pair& rhs) { + return (lhs.first != rhs.first) && (lhs.second != rhs.second); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator<(const pair& lhs, const pair& rhs) { + return (lhs.first < rhs.first) ? true : (rhs.first < lhs.first) ? false + : (lhs.second < rhs.second); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator<=(const pair& lhs, const pair& rhs) { + return !(rhs < lhs); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator>(const pair& lhs, const pair& rhs) { + return (rhs < lhs); +} + +template +CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair& lhs, const pair& rhs) { + return !(lhs < rhs); +} + +template +CUTLASS_HOST_DEVICE std::pair make_pair(T1 t, T2 u) { + std::pair retval; + retval.first = t; + retval.second = u; + return retval; +} +#endif + +} // namespace platform + +/****************************************************************************** + * Implementations of C++ 11/14/17/... STL features + ******************************************************************************/ + +namespace platform { + +//----------------------------------------------------------------------------- +// Integral constant helper types +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::integral_constant +template +struct integral_constant; + +/// std::integral_constant +template +struct integral_constant { + static const value_t value = V; + + typedef value_t value_type; + typedef integral_constant type; + + CUTLASS_HOST_DEVICE operator value_type() const { return value; } + + CUTLASS_HOST_DEVICE const value_type operator()() const { return value; } +}; + +#else + +using std::integral_constant; +using std::pair; + +#endif + +/// The type used as a compile-time boolean with true value. +typedef integral_constant true_type; + +/// The type used as a compile-time boolean with false value. +typedef integral_constant false_type; + +#if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900)) + +/// std::bool_constant +template +struct bool_constant : platform::integral_constant {}; + +#else + +using std::bool_constant; + +#endif + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700)) + +/// std::nullptr_t +struct nullptr_t {}; + +#else + +using std::nullptr_t; + +#endif + +//----------------------------------------------------------------------------- +// Conditional metaprogramming +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600)) + +/// std::enable_if (true specialization) +template +struct enable_if { + typedef T type; +}; + +/// std::enable_if (false specialization) +template +struct enable_if {}; + +/// std::conditional (true specialization) +template +struct conditional { + typedef T type; +}; + +/// std::conditional (false specialization) +template +struct conditional { + typedef F type; +}; + +#else + +using std::enable_if; +using std::conditional; + +#endif + +//----------------------------------------------------------------------------- +// Const/volatility specifiers +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::remove_const (non-const specialization) +template +struct remove_const { + typedef T type; +}; + +/// std::remove_const (const specialization) +template +struct remove_const { + typedef T type; +}; + +/// std::remove_volatile (non-volatile specialization) +template +struct remove_volatile { + typedef T type; +}; + +/// std::remove_volatile (volatile specialization) +template +struct remove_volatile { + typedef T type; +}; + +/// std::remove_cv +template +struct remove_cv { + typedef typename remove_volatile::type>::type type; +}; + +#else + +using std::remove_const; +using std::remove_volatile; +using std::remove_cv; + +#endif + +//----------------------------------------------------------------------------- +// Type relationships +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::is_same (false specialization) +template +struct is_same : false_type {}; + +/// std::is_same (true specialization) +template +struct is_same : true_type {}; + +/// Helper for std::is_base_of +template +struct is_base_of_helper { + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct dummy { + CUTLASS_HOST_DEVICE operator B*() const; + CUTLASS_HOST_DEVICE operator D*(); + }; + + template + CUTLASS_HOST_DEVICE static yes check(DerivedT*, T); + + CUTLASS_HOST_DEVICE static no check(BaseT*, int); + + static const bool value = sizeof(check(dummy(), int())) == sizeof(yes); +}; + +/// std::is_base_of +template +struct is_base_of + : integral_constant::type, + typename remove_cv::type>::value) || + (is_same::type, + typename remove_cv::type>::value)> {}; + +#else + +using std::is_same; +using std::is_base_of; + +#endif + +//----------------------------------------------------------------------------- +// Type properties +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::is_volatile +template +struct is_volatile : false_type {}; +template +struct is_volatile : true_type {}; + +/// Helper for std::is_pointer (false specialization) +template +struct is_pointer_helper : false_type {}; + +/// Helper for std::is_pointer (true specialization) +template +struct is_pointer_helper : true_type {}; + +/// std::is_pointer +template +struct is_pointer : is_pointer_helper::type> {}; + +/// std::is_void +template +struct is_void : is_same::type> {}; + +/// std::is_integral +template +struct is_integral : false_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template <> +struct is_integral : true_type {}; +template +struct is_integral : is_integral {}; +template +struct is_integral : is_integral {}; +template +struct is_integral : is_integral {}; + +/// std::is_floating_point +template +struct is_floating_point + : integral_constant::type>::value || + is_same::type>::value)> {}; + +/// std::is_arithmetic +template +struct is_arithmetic + : integral_constant::value || is_floating_point::value)> {}; + +/// std::is_fundamental +template +struct is_fundamental + : integral_constant::value || is_void::value || + is_same::type>::value)> {}; + +#else + +using std::is_volatile; +using std::is_pointer; +using std::is_void; +using std::is_integral; +using std::is_floating_point; +using std::is_arithmetic; +using std::is_fundamental; + +#endif + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \ + (defined(__GNUG__) && (__GNUC__ < 5)) + +/** + * std::is_trivially_copyable + * + * This implementation only evaluates true if T is fundamental or pointer + * + * Without help from partial template specializations provided by the user for + * a specific class or struct, this trait will never report that the specified + * class or struct is trivially-copyable ; this is always safe, + * if possibly sub-optimal. + */ +template +struct is_trivially_copyable + : integral_constant::value || is_pointer::value)> {}; + +#else + +using std::is_trivially_copyable; + +#endif + +//----------------------------------------------------------------------------- +// Alignment and layout utilities +//----------------------------------------------------------------------------- + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500)) + +/// std::alignment_of +template +struct alignment_of { + struct pad { + value_t val; + char byte; + }; + + enum { value = sizeof(pad) - sizeof(value_t) }; +}; + +#else + +template +struct alignment_of : std::alignment_of {}; + +#endif + +/* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */ +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; +template <> +struct alignment_of { + enum { value = 16 }; +}; + +// Specializations for volatile/const qualified types +template +struct alignment_of : alignment_of {}; +template +struct alignment_of : alignment_of {}; +template +struct alignment_of : alignment_of {}; + +#if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) + +template +struct aligned_chunk; +template <> +struct __align__(1) aligned_chunk<1> { + uint8_t buff; +}; +template <> +struct __align__(2) aligned_chunk<2> { + uint16_t buff; +}; +template <> +struct __align__(4) aligned_chunk<4> { + uint32_t buff; +}; +template <> +struct __align__(8) aligned_chunk<8> { + uint32_t buff[2]; +}; +template <> +struct __align__(16) aligned_chunk<16> { + uint32_t buff[4]; +}; +template <> +struct __align__(32) aligned_chunk<32> { + uint32_t buff[8]; +}; +template <> +struct __align__(64) aligned_chunk<64> { + uint32_t buff[16]; +}; +template <> +struct __align__(128) aligned_chunk<128> { + uint32_t buff[32]; +}; +template <> +struct __align__(256) aligned_chunk<256> { + uint32_t buff[64]; +}; +template <> +struct __align__(512) aligned_chunk<512> { + uint32_t buff[128]; +}; +template <> +struct __align__(1024) aligned_chunk<1024> { + uint32_t buff[256]; +}; +template <> +struct __align__(2048) aligned_chunk<2048> { + uint32_t buff[512]; +}; +template <> +struct __align__(4096) aligned_chunk<4096> { + uint32_t buff[1024]; +}; + +/// std::aligned_storage +template +struct aligned_storage { + typedef aligned_chunk type[Len / sizeof(aligned_chunk)]; +}; + +#else + +using std::aligned_storage; + +#endif + +#if !defined(__CUDACC_RTC__) +/// Default deleter +template +struct default_delete { + void operator()(T* ptr) const { delete ptr; } +}; + +/// Partial specialization for deleting array types +template +struct default_delete { + void operator()(T* ptr) const { delete[] ptr; } +}; + +/// std::unique_ptr +template > +class unique_ptr { + public: + typedef T* pointer; + typedef T element_type; + typedef Deleter deleter_type; + + private: + /// Pointer to memory + pointer _ptr; + + /// Deleter + deleter_type _deleter; + + public: + unique_ptr() : _ptr(nullptr) {} + unique_ptr(pointer p) : _ptr(p) {} + + ~unique_ptr() { + if (_ptr) { + _deleter(_ptr); + } + } + /// Returns a pointer to the managed object or nullptr if no object is owned. + pointer get() const noexcept { return _ptr; } + + /// Releases ownership of the managed object, if any + pointer release() noexcept { + pointer p(_ptr); + _ptr = nullptr; + return p; + } + + /// Replaces the managed object, deleting the old object. + void reset(pointer p = pointer()) noexcept { + pointer old_ptr = _ptr; + _ptr = p; + if (old_ptr != nullptr) { + get_deleter()(old_ptr); + } + } + + /// Swaps the managed objects with *this and another unique_ptr + void swap(unique_ptr& other) noexcept { std::swap(_ptr, other._ptr); } + + /// Returns the deleter object + Deleter& get_deleter() noexcept { return _deleter; } + + /// Returns the deleter object + Deleter const& get_deleter() const noexcept { return _deleter; } + + /// Checks whether an object is owned + operator bool() const noexcept { return _ptr != nullptr; } + + /// Dereferences the unique_ptr + T& operator*() const { return *_ptr; } + + /// Returns a pointer to the managed object + pointer operator->() const noexcept { return _ptr; } + + /// Array access to managed object + T& operator[](size_t i) const { return _ptr[i]; } +}; + +/// Specializes the swap algorithm +template +void swap(unique_ptr& lhs, unique_ptr& rhs) noexcept { + lhs.swap(rhs); +} +#endif + +}; // namespace platform +}; // namespace cutlass diff --git a/cutlass/util/printable.h b/cutlass/util/printable.h deleted file mode 100644 index dd7bda408c..0000000000 --- a/cutlass/util/printable.h +++ /dev/null @@ -1,72 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Pure virtual base class for printable types - */ - -#include - - -namespace cutlass { - - -/****************************************************************************** - * printable_t - ******************************************************************************/ - -/** - * Pure virtual base class for printable types - */ -struct printable_t -{ - /// Returns the instance as a string - __host__ __device__ inline - virtual char const* to_string() const = 0; - - /// Insert the formatted instance into the output stream - virtual void print(std::ostream& out) const = 0; - - /// Destructor - virtual ~printable_t() {} -}; - - -/// Insert the formatted \p printable into the output stream -std::ostream& operator<<( - std::ostream& out, - printable_t const& printable) -{ - printable.print(out); - return out; -} - - -} // namespace cutlass diff --git a/cutlass/util/util.h b/cutlass/util/util.h deleted file mode 100644 index e4247ccd76..0000000000 --- a/cutlass/util/util.h +++ /dev/null @@ -1,82 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Umbrella header file for utilities - */ - -#include "debug.h" -#include "device_introspection.h" -#include "io_intrinsics.h" -#include "math.h" -#include "nv_std.h" -#include "printable.h" -#include "matrix_transform.h" - - - -namespace cutlass { - - -/****************************************************************************** - * int_constant - ******************************************************************************/ - -/** - * Shorthand for nv_std::integral_constant of int32_t type - */ -template -struct int_constant : nv_std::integral_constant -{}; - - -/****************************************************************************** - * Uninitialized - ******************************************************************************/ - -/** - * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions - */ -template -struct __align__(16) uninitialized -{ - /// Backing storage - uint8_t storage[sizeof(T)]; - - /// Alias - __host__ __device__ __forceinline__ T& alias() - { - return reinterpret_cast(*this); - } -}; - - - -} // namespace cutlass diff --git a/cutlass/vector.h b/cutlass/vector.h new file mode 100644 index 0000000000..a66dfdef7c --- /dev/null +++ b/cutlass/vector.h @@ -0,0 +1,229 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Defines a 1D vector of elements held in the registers of each thread. +*/ +#pragma once + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) +#include +#endif + +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct AlignedStruct {}; + +template <> +struct __align__(1) AlignedStruct<1>{}; +template <> +struct __align__(2) AlignedStruct<2>{}; +template <> +struct __align__(4) AlignedStruct<4>{}; +template <> +struct __align__(8) AlignedStruct<8>{}; +template <> +struct __align__(16) AlignedStruct<16>{}; +template <> +struct __align__(32) AlignedStruct<32>{}; +template <> +struct __align__(64) AlignedStruct<64>{}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +union Vector { + /// The scalar type. + typedef Scalar_ Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes * (int)sizeof(Scalar) }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + // Make sure that the vector type makes sense. + static_assert(kVectorSize <= 16, "Vector type is too large"); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The associated array of scalars. + Scalar scalars[kLanes]; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; } + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16) + +template +union Vector { + /// The scalar type. + typedef half Scalar; + + /// The number of elements in the vector. + enum { kLanes = kLanes_ }; + /// The size of the vector. + enum { kVectorSize = kLanes * (int)sizeof(Scalar) }; + /// The number of registers needed to store the vector. + enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 }; + + // Make sure that the vector type makes sense. + static_assert(kVectorSize <= size_t(16), "Vector type is too large"); + + /// The aligned storage to make sure we have good alignment. + AlignedStruct aligned_; + /// The associated array of scalars. + uint16_t scalars[kLanes]; + /// The data in registers. + uint32_t registers[kRegisters]; + + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { + return reinterpret_cast(scalars[i]); + } + /// Accessor to the ith lane. + CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast(scalars[i]); } +}; + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTLASS_DEVICE void make_zero(Scalar_& x) { + x = Scalar_(0); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Vectorize { + typedef Vector Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct Vectorize { + typedef Element_ Type; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +template +CUTLASS_DEVICE void make_zero(Vector& vec) { + for (int i = 0; i < Vector::kRegisters; ++i) { + vec.registers[i] = 0; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// +// cutlass::Extent similar to std::extent but applicable to CUTLASS types +// + +/// Returns the extent of a scalar or vector +template +struct Extent { + static size_t const kValue = 1; +}; + +/// Returns the number of lanes of a vector if need be +template +struct Extent > { + static size_t const kValue = Lanes; +}; + +/// Returns the number of lanes of a vector if need be +template +struct Extent const> { + static size_t const kValue = Lanes; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Traits describing properties of vectors and scalar-as-vectors +template +struct VectorTraits { + /// Scalar type + typedef T Scalar; + + /// Number of lanes of vector + static int const kLanes = 1; + + /// True if the type is actually a cutlass::Vector, otherwise false + static bool const IsVector = false; + + /// Type that is always a vector + typedef Vector Vector; +}; + +/// Partial specialization for actual cutlass::Vector +template +struct VectorTraits > { + /// Scalar type + typedef T Scalar; + + /// Number of lanes of vector + static int const kLanes = Lanes; + + /// Type is actually a cutlass::Vector + static bool const IsVector = true; + + /// Type that is always a Vector + typedef Vector Vector; +}; + +/// Partial specialization for actual cutlass::Vector +template +struct VectorTraits const> { + /// Scalar type + typedef T Scalar; + + /// Number of lanes of vector + static int const kLanes = Lanes; + + /// Type is actually a cutlass::Vector + static bool const IsVector = true; + + /// Type that is always a Vector + typedef Vector Vector; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass diff --git a/cutlass/wmma_matrix.h b/cutlass/wmma_matrix.h new file mode 100644 index 0000000000..c4d8a0b54b --- /dev/null +++ b/cutlass/wmma_matrix.h @@ -0,0 +1,193 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Abstractions for loading and storing matrices using the CUDA WMMA API. +*/ +#pragma once + +#if defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700) + +// Dependent header files should use the following macro to guard all code using +// nvcuda::wmma:: to enable compilation for CUDA Compute Capabilities < sm_70. +// Earlier shader models not support Tensor Cores. +#define CUTLASS_USE_WMMA_API + +#include "stdio.h" + +#include +#include +#include +#include +#include +#include + +namespace cutlass { + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Statically maps cutlass::MatrixLayout => nvcuda::wmma layout tags +template +struct WmmaLayout { + typedef nvcuda::wmma::col_major Layout; +}; + +/// Statically maps cutlass::MatrixLayout => nvcuda::wmma layout tags +template <> +struct WmmaLayout { + typedef nvcuda::wmma::row_major Layout; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment load and store operations +template +struct WmmaMatrix {}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment accessors for A operand +template +struct WmmaMatrix + : public nvcuda::wmma::fragment< + /// The nvcuda::wmma operand name. + nvcuda::wmma::matrix_a, + /// The dimensions. + WmmaShape_::kW, + WmmaShape_::kH, + WmmaShape_::kD, + /// The scalar. + Scalar_, + /// The layout. + typename WmmaLayout::Layout> { + /// This type. + typedef WmmaMatrix This_; + + /// Fill-in the element. + CUTLASS_DEVICE This_& operator=(Scalar_ const& x) { + nvcuda::wmma::fill_fragment(*this, x); + return *this; + } + + /// Load from memory. + CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) { + nvcuda::wmma::load_matrix_sync(*this, pointer, stride); + } + + /// Store to memory. + CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const { + nvcuda::wmma::store_matrix_sync(pointer, *this, stride); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment accessors for B operand +template +struct WmmaMatrix + : public nvcuda::wmma::fragment< + /// The nvcuda::wmma operand name. + nvcuda::wmma::matrix_b, + /// The dimensions. + WmmaShape_::kW, + WmmaShape_::kH, + WmmaShape_::kD, + /// The scalar. + Scalar_, + /// The layout. + typename WmmaLayout::Layout> { + /// This type. + typedef WmmaMatrix This_; + + /// Fill-in the element. + CUTLASS_DEVICE This_& operator=(Scalar_ const& x) { + nvcuda::wmma::fill_fragment(*this, x); + return *this; + } + + /// Load from memory. + CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) { + nvcuda::wmma::load_matrix_sync(*this, pointer, stride); + } + + /// Store to memory. + CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const { + nvcuda::wmma::store_matrix_sync(pointer, *this, stride); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Adapter to nvcuda::wmma fragment accessors for C operand +template +struct WmmaMatrix + : public nvcuda::wmma::fragment< + /// The nvcuda::wmma operand name. + nvcuda::wmma::accumulator, + /// The dimensions. + WmmaShape_::kW, + WmmaShape_::kH, + WmmaShape_::kD, + /// The scalar. + Scalar_> { + /// This type. + typedef WmmaMatrix This_; + /// The layout. + static MatrixLayout::Kind const kLayout = kLayout_; + + /// Fill-in the element. + CUTLASS_DEVICE This_& operator=(Scalar_ const& x) { + nvcuda::wmma::fill_fragment(*this, x); + return *this; + } + + /// Load from memory. + CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) { + bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor; + nvcuda::wmma::load_matrix_sync( + *this, + pointer, + stride, + kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major); + } + + /// Store to memory. + CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const { + bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor; + nvcuda::wmma::store_matrix_sync( + pointer, + *this, + stride, + kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace cutlass + +#endif // defined CUTLASS_USE_WMMA_API diff --git a/cutlass_test/.gitignore b/cutlass_test/.gitignore deleted file mode 100644 index 5628abb92c..0000000000 --- a/cutlass_test/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -/bin/ -/gemm-GPU.csv -/gemm-REF.csv -/a.csv -/b.csv -/gp100_schmoo/ -/ignore/ diff --git a/cutlass_test/Makefile b/cutlass_test/Makefile deleted file mode 100644 index 8b4b87ee42..0000000000 --- a/cutlass_test/Makefile +++ /dev/null @@ -1,180 +0,0 @@ -#/****************************************************************************** -# * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. -# * -# * Redistribution and use in source and binary forms, with or without -# * modification, are permitted provided that the following conditions are met: -# * * Redistributions of source code must retain the above copyright -# * notice, this list of conditions and the following disclaimer. -# * * Redistributions in binary form must reproduce the above copyright -# * notice, this list of conditions and the following disclaimer in the -# * documentation and/or other materials provided with the distribution. -# * * Neither the name of the NVIDIA CORPORATION nor the -# * names of its contributors may be used to endorse or promote products -# * derived from this software without specific prior written permission. -# * -# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY -# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# * -# ******************************************************************************/ - - -#------------------------------------------------------------------------------- -# -# Makefile usage -# -# make sm= [transpose=] [verbose=<0*|1>] [keep=<0*|1>] -# -# * : default -# -#------------------------------------------------------------------------------- - -TEST_DIR := $(dir $(lastword $(MAKEFILE_LIST))) - -include ../common.mk - - -#------------------------------------------------------------------------------- -# Commandline Options -#------------------------------------------------------------------------------- - -ifdef transpose - TRANSPOSE := $(transpose) -else - TRANSPOSE := nn -endif - -# If defined, GEMMs only compiled with specified alignment restrictions on A and B -# matrices. Otherwise, kernels are compiled for all feasible alignment options, and -# the appropriate kernel is selected. -ifdef alignment - DEFINES += -DGEMM_ALIGNMENT=$(alignment) -endif - -# If defined as false, ragged handling can be disabled. -ifdef ragged - DEFINES += -DGEMM_RAGGED=$(ragged) -endif - -#------------------------------------------------------------------------------- -# Include and Library paths -#------------------------------------------------------------------------------- - -INC += -I$(TEST_DIR) -INC += -I$(BASE_DIR) - -LIBS += -lcublas - -#------------------------------------------------------------------------------- -# Preprocessor definitions -#------------------------------------------------------------------------------- - -ifeq (nt, $(TRANSPOSE)) - DEFINES += -DTRANSPOSE_B -else ifeq (tn, $(TRANSPOSE)) - DEFINES += -DTRANSPOSE_A - -else ifeq (tt, $(TRANSPOSE)) - DEFINES += -DTRANSPOSE_A - DEFINES += -DTRANSPOSE_B -endif - -NVCCFLAGS += -std=c++11 - - -#------------------------------------------------------------------------------- -# Dependency Lists -#------------------------------------------------------------------------------- - -DEPS := $(call rwildcard, $(BASE_DIR),*.h) \ - $(call rwildcard, $(BASE_DIR)cgl,*.h) \ - $(BASE_DIR)common.mk \ - $(TEST_DIR)Makefile - - -ALL := sgemm \ - dgemm \ - hgemm \ - igemm - - -#------------------------------------------------------------------------------- -# make default -#------------------------------------------------------------------------------- - -default: - - -#------------------------------------------------------------------------------- -# make clean -#------------------------------------------------------------------------------- - -clean : - rm -f bin/* - rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o *.obj* *dlink.* *.res *.fatbin *.module_id - - -#------------------------------------------------------------------------------- -# make all -#------------------------------------------------------------------------------- - -all : $(ALL) - - -#------------------------------------------------------------------------------- -# make sgemm -#------------------------------------------------------------------------------- - -sgemm: bin/sgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/sgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_SGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make dgemm -#------------------------------------------------------------------------------- - -dgemm: bin/dgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/dgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_DGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make hgemm -#------------------------------------------------------------------------------- - -hgemm: bin/hgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/hgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_HGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make igemm -#------------------------------------------------------------------------------- - -igemm: bin/igemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/igemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_IGEMM $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - -#------------------------------------------------------------------------------- -# make wgemm -#------------------------------------------------------------------------------- - -wgemm: bin/wgemm_$(TRANSPOSE)_$(BIN_SUFFIX) - -bin/wgemm_$(TRANSPOSE)_$(BIN_SUFFIX) : gemm.cu $(DEPS) - mkdir -p bin - $(NVCC) -DTEST_WGEMM -DWMMA $(DEFINES) $(SM_TARGETS) -o $@ gemm.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBINC) $(LIBS) - diff --git a/cutlass_test/cublas_dispatch.h b/cutlass_test/cublas_dispatch.h deleted file mode 100644 index 9b21926e88..0000000000 --- a/cutlass_test/cublas_dispatch.h +++ /dev/null @@ -1,300 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * C++ interface for dispatching CUBLAS GEMM calls - */ - -#include - -namespace cutlass { - - -/****************************************************************************** - * cuBLAS dispatch entrypoints - ******************************************************************************/ - -/** - * Dispatch cuBLAS igemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - int32_t alpha, ///< Scalar used for multiplicands - int8_t *d_a, ///< Device pointer to matrix A array values - int8_t *d_b, ///< Device pointer to matrix B array values - int32_t beta, ///< Scalar used for addend - int32_t *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasGemmEx( - cublas_handle, - transform_a, - transform_b, - m, - n, - k, - (void*) &alpha, - (void*) d_a, - CUDA_R_8I, - (transform_a == CUBLAS_OP_N) ? m : k, - (void*) d_b, - CUDA_R_8I, - (transform_b == CUBLAS_OP_N) ? k : n, - (void*) &beta, - (void*) d_c, - CUDA_R_32I, - m, - CUDA_R_32I, - CUBLAS_GEMM_DFALT); -} - - -/** - * Dispatch cuBLAS hgemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - __half alpha, ///< Scalar used for multiplicands - __half *d_a, ///< Device pointer to matrix A array values - __half *d_b, ///< Device pointer to matrix B array values - __half beta, ///< Scalar used for addend - __half *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasHgemm( - cublas_handle, transform_a, transform_b, - m, n, k, - &alpha, - d_a, - (transform_a == CUBLAS_OP_N) ? m : k, - d_b, - (transform_b == CUBLAS_OP_N) ? k : n, - &beta, - d_c, - m); - -} - - -/** - * Dispatch cuBLAS sgemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - float alpha, ///< Scalar used for multiplicands - float *d_a, ///< Device pointer to matrix A array values - float *d_b, ///< Device pointer to matrix B array values - float beta, ///< Scalar used for addend - float *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasSgemm( - cublas_handle, transform_a, transform_b, - m, n, k, - &alpha, - d_a, - (transform_a == CUBLAS_OP_N) ? m : k, - d_b, - (transform_b == CUBLAS_OP_N) ? k : n, - &beta, - d_c, - m); -} - - -/** - * Dispatch cuBLAS dgemm - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - double alpha, ///< Scalar used for multiplicands - double *d_a, ///< Device pointer to matrix A array values - double *d_b, ///< Device pointer to matrix B array values - double beta, ///< Scalar used for addend - double *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasDgemm( - cublas_handle, transform_a, transform_b, - m, n, k, - &alpha, - d_a, (transform_a == CUBLAS_OP_N) ? m : k, - d_b, (transform_b == CUBLAS_OP_N) ? k : n, - &beta, - d_c, m); -} - -/** - * Dispatch cuBLAS Tensor Cores GEMM - */ -cublasStatus_t cublas_gemm_dispatch( - cublasHandle_t cublas_handle, ///< CUBLAS handle - cublasOperation_t transform_a, ///< Transform op(A) that is non- or (conj.) transpose. - cublasOperation_t transform_b, ///< Transform op(B) that is non- or (conj.) transpose. - int m, ///< Height in rows of op(A) and C - int n, ///< Width in columns of op(B) and C - int k, ///< Width in columns of op(A) and height in rows of op(B) - float alpha, ///< Scalar used for multiplicands - half *d_a, ///< Device pointer to matrix A array values - half *d_b, ///< Device pointer to matrix B array values - float beta, ///< Scalar used for addend - float *d_c, ///< Device pointer to matrix C array values - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. -{ - return cublasGemmEx( - cublas_handle, - transform_a, - transform_b, - m, - n, - k, - (void*) &alpha, - (void*) d_a, - CUDA_R_16F, - (transform_a == CUBLAS_OP_N) ? m : k, - (void*) d_b, - CUDA_R_16F, - (transform_b == CUBLAS_OP_N) ? k : n, - (void*) &beta, - (void*) d_c, - CUDA_R_32F, - m, - CUDA_R_32F, - CUBLAS_GEMM_DFALT_TENSOR_OP); -} - - -/** - * Uses cuBLAS to compute gemm on device matrices (unspecialized) - */ -template < - gemm::tiling_strategy::kind_t _TilingStrategy, ///< Tile-sizing classification category - math_operation_class_t _math_op, - matrix_transform_t::kind_t _TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t _TransformB, ///< Transformation op for matrix B - typename _value, ///< Multiplicand value type (matrices A and B) - typename _accum ///< Accumulator value type (matrix C and scalars) -> -struct cublas_gemm -{ - // - // Type alias definitions - // - - static const gemm::tiling_strategy::kind_t TilingStrategy = _TilingStrategy; - static const math_operation_class_t math_op = _math_op; - static const matrix_transform_t::kind_t TransformA = _TransformA; - static const matrix_transform_t::kind_t TransformB = _TransformB; - - using value_t = _value; - using accum_t = _accum; - - /// Launches a GEMM - gemm::launch_configuration operator()( - cublasHandle_t cublas_handle, ///< CUBLAS handle - int m, - int n, - int k, - value_t *A, ///< A matrix - value_t *B, ///< B matrix - accum_t *C, ///< C matrix - accum_t alpha, ///< Scalar used for multiplicands - accum_t beta, ///< Scalar used for addend - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. Default is stream0. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream after every kernel launch to check for errors. - { - cublasStatus_t cublas_error = cublas_gemm_dispatch( - cublas_handle, - (cublasOperation_t) TransformA, - (cublasOperation_t) TransformB, - m, - n, - k, - alpha, - A, - B, - beta, - C, - stream, - debug_synchronous); - - cudaError_t error; - if (cublas_error != CUBLAS_STATUS_SUCCESS) - { - if (cublas_error == CUBLAS_STATUS_NOT_SUPPORTED) { - return gemm::launch_configuration(cudaErrorInvalidValue); - } - - error = cudaGetLastError(); - if (error == cudaSuccess) { - return gemm::launch_configuration(cudaErrorUnknown); - } - return error; - } - - // Check for failure to launch - if (CUDA_PERROR_DEBUG(error = cudaPeekAtLastError())) - return gemm::launch_configuration(error); - - // Sync the stream if specified to flush runtime errors - if (debug_synchronous && (CUDA_PERROR_DEBUG(error = cudaStreamSynchronize(stream)))) - return gemm::launch_configuration(error); - - return gemm::launch_configuration(error); - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/cutlass_dispatch.h b/cutlass_test/cutlass_dispatch.h deleted file mode 100644 index 43bd7e673b..0000000000 --- a/cutlass_test/cutlass_dispatch.h +++ /dev/null @@ -1,261 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file Dispatch routines for CUTLASS GEMM kernels - */ - -// CUDA includes -#include - -// Cutlass GEMM API -#include -#include -#include - -// Test utilities -#include "util/type_conversion.h" - -namespace cutlass { - - - -/****************************************************************************** - * Cutlass dispatch entrypoints - ******************************************************************************/ - -// -// Compile-time overrides for alignment and ragged handling. -// - -// If zero, all feasible alignment options are supported. -#ifndef GEMM_ALIGNMENT -#define GEMM_ALIGNMENT 0 -#endif - -// If true, kernels are compiled with ragged handling enabled. -#ifndef GEMM_RAGGED - #define GEMM_RAGGED true -#endif - -// -// Dispatch logic given problem size specialization, math operation class, layout -// and type of operands, and epilogue operation. -// - -/** - * Cutlass GEMM dispatch - */ -template < - gemm::tiling_strategy::kind_t _TilingStrategy, ///< Tile-sizing classification category - math_operation_class_t _math_op, // Indicates - matrix_transform_t::kind_t _TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t _TransformB, ///< Transformation op for matrix B - typename _value, ///< Multiplicand value type (matrices A and B) - typename _accum, ///< Accumulator value type (matrix C and scalars) - typename _epilogue_op_t ///< Epilogue opeartion to update matrix C - = gemm::blas_scaled_epilogue<_accum, _accum, _accum> -> -struct cutlass_gemm_dispatch -{ - // - // Type alias definitions - // - - static const gemm::tiling_strategy::kind_t TilingStrategy = _TilingStrategy; - static const math_operation_class_t math_op = _math_op; - static const matrix_transform_t::kind_t TransformA = _TransformA; - static const matrix_transform_t::kind_t TransformB = _TransformB; - - using value_t = _value; - using accum_t = _accum; - using epilogue_op_t = _epilogue_op_t; - - // - // Methods - // - - /// Returns leading dimension for A matrix operand - int leading_dim_a(int m, int k) const - { - return (TransformA == matrix_transform_t::NonTranspose ? m : k); - } - - /// Returns leading dimension for B matrix operand - int leading_dim_b(int k, int n) const - { - return (TransformB == matrix_transform_t::NonTranspose ? k : n); - } - - /// Launches a GEMM - template - gemm::launch_configuration launch( - int m, - int n, - int k, - epilogue_op_t epilogue_op, - value_t *A, - value_t *B, - accum_t *C, - cudaStream_t stream = 0, - bool debug_synchronous = false) - { - return gemm::device_gemm< - TilingStrategy, - math_op, - TransformA, - operand_alignment, - TransformB, - operand_alignment, - value_t, - accum_t, - epilogue_op_t, - accumulator_alignment> - ( - m, - n, - k, - epilogue_op, - A, - B, - C, - stream, - debug_synchronous); - } - - /// Dispatches a CUTLASS GEMM - gemm::launch_configuration operator()( - cublasHandle_t handle, ///< CUBLAS handle - int m, ///< Rows of GEMM problem - int n, ///< Columns of GEMM problem - int k, ///< Inner dimension of GEMM problem - value_t *A, ///< A matrix - value_t *B, ///< B matrix - accum_t *C, ///< C matrix - accum_t alpha, ///< Scalar used for multiplicands - accum_t beta, ///< Scalar used for addend - cudaStream_t stream = 0, ///< CUDA stream to launch kernels within. - bool debug_synchronous = false) ///< Whether or not to synchronize the stream - /// after every kernel launch to check for errors. - { - - // Forces kernel selection to choose specific alignment (in bytes) - int const force_operand_alignment = GEMM_ALIGNMENT; - - // Problem size must be multiple of the smallest vector load size - typedef value_t operand_load_t; - int const accumulator_alignment = sizeof(accum_t); - - int const lda = leading_dim_a(m, k); - int const ldb = leading_dim_b(k, n); - - epilogue_op_t epilogue(alpha, beta); - - // TODO: opportunity for metaprogramming loop - - // Prefer the largest granularity of vector load that is compatible with - // problem size and data alignment. - if ((!force_operand_alignment || force_operand_alignment == 16) && - !((sizeof(operand_load_t) * lda) % 16) && - !((sizeof(operand_load_t) * ldb) % 16)) - { - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 16) - return launch<__NV_STD_MAX(16, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - } - else if ((!force_operand_alignment || force_operand_alignment == 8) && - !((sizeof(operand_load_t) * lda) % 8) && - !((sizeof(operand_load_t) * ldb) % 8)) - { - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 8) - return launch<__NV_STD_MAX(8, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - } - else if ((!force_operand_alignment || force_operand_alignment == 4) && - !((sizeof(operand_load_t) * lda) % 4) && - !((sizeof(operand_load_t) * ldb) % 4)) - { - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 4) - return launch<__NV_STD_MAX(4, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - } - else if ((!force_operand_alignment || force_operand_alignment == 2) && - !((sizeof(operand_load_t) * lda) % 2) && - !((sizeof(operand_load_t) * ldb) % 2)) - { - // 16-bit alignment only supported for HGEMM - #if defined(TEST_HGEMM) || defined(TEST_WGEMM) - #if !(GEMM_ALIGNMENT) || (GEMM_ALIGNMENT == 2) - return launch<__NV_STD_MAX(2, sizeof(value_t)), accumulator_alignment>( - m, - n, - k, - epilogue, - A, - B, - C, - stream, - debug_synchronous); - #endif - #endif - } - - return gemm::launch_configuration(cudaErrorInvalidValue); - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/gemm.cu b/cutlass_test/gemm.cu deleted file mode 100644 index bdf296041d..0000000000 --- a/cutlass_test/gemm.cu +++ /dev/null @@ -1,572 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/** - * \file gemm.cu - * GEMM test driver - * - */ - -#include -#include -#include -#include - -// CUBLAS GEMM API -#include - -// Set Cutlass debug macro to enable console printing of library errors -#define DEBUG - -#if defined(WMMA) -// Conditionally include WMMA headers (CUDA 9 Preview Feature) -#include -#endif - -// Cutlass GEMM API -#include -#include -#include - -// Test utilities -#include "util/command_line.h" -#include "util/half.h" -#include "util/matrix.h" -#include "util/timer.h" -#include "util/type_conversion.h" - -// Dispatch routines to CUBLAS and CUTLASS -#include "cublas_dispatch.h" -#include "cutlass_dispatch.h" - -/****************************************************************************** - * Globals, constants and typedefs - ******************************************************************************/ - -using namespace cutlass; - -/// CUBLAS handle -cublasHandle_t g_cublas_handle; - -/// The device-id of the current device -int g_device_id = -1; - -/// The number of timing iterations to invoke -int g_timing_iterations = -1; - -/// The number of randomly-sized problems to schmoo -int g_schmoo = 0; - - -/****************************************************************************** - * Number generation - ******************************************************************************/ - -/** - * Simple low-integer generator - */ -struct simple_gen -{ - std::default_random_engine generator; - std::uniform_int_distribution distribution; - - /// Constructor - simple_gen(int max) : distribution(max * -1, max) - {} - - /// Functor - int operator()() - { - return distribution(generator); - } -}; - - - - -/****************************************************************************** - * Test execution - ******************************************************************************/ - - -/** - * Compute C = (alpha * A * B) + (beta * C) - */ -template < - typename test_func_t, ///< Test function type - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t> ///< Accumulator value type (matrix C and scalars) -bool test( - int m, ///< Height of C in rows - int n, ///< Width of C in columns - int k, ///< Width (height) of A (B) - accum_t alpha, ///< Multiplicand scalar - accum_t beta) ///< Addend scalar -{ - cudaStream_t stream = 0; - - // - // Initialize matrices - // - - matrix A( - (TransformA == matrix_transform_t::NonTranspose) ? m : k, - (TransformA == matrix_transform_t::NonTranspose) ? k : m); - - matrix B( - (TransformB == matrix_transform_t::NonTranspose) ? k : n, - (TransformB == matrix_transform_t::NonTranspose) ? n : k); - - matrix C(m, n); - - // initialized matrices with small values precisely representable as integers - simple_gen a_gen(3); - simple_gen b_gen(5); - A.fill_random(a_gen); - B.fill_random(b_gen); - C.fill_ramp(0,0); - -// // Alternatively, initialize with procedural values to simplify debugging incorrect results -// A.fill_ramp(1,2); -// B.fill_ramp(1,1); - - // Sync to device - A.sync_device(); - B.sync_device(); - C.sync_device(); - - CUDA_PERROR(cudaPeekAtLastError()); - CUDA_PERROR(cudaDeviceSynchronize()); - - // - // Run test once with debug-synchronous enabled and check result - // - - if (!g_schmoo) printf("\n"); - - test_func_t test_func; - - C.fill_ramp(0, 0); - C.sync_device(); - - cudaError_t error = test_func( - g_cublas_handle, - m, - n, - k, - A.d_data(), - B.d_data(), - C.d_data(), - alpha, - beta, - stream, - !g_schmoo).result; - - bool not_applicable = (error == cudaErrorInvalidValue); - bool is_failed = false; - if (not_applicable) - { - printf(", NA"); - } - else - { - CUDA_PERROR(error); - - // Compute reference check if wont take too long on CPU - if ((!g_schmoo) && (m * n <= 1024 * 1024)) - { - matrix ref_C(m, n); - ref_C.fill_ramp(0, 0); - ref_C.gemm(TransformA, TransformB, alpha, A, B, beta); - C.sync_host(); - - is_failed = (C != ref_C); - - if (!g_schmoo) - { - if (is_failed) - { - printf("FAIL, "); - std::ofstream file_a("a.csv"); - A.write_matrix(file_a); - std::ofstream file_b("b.csv"); - B.write_matrix(file_b); - std::ofstream file_d("gemm-REF.csv"); - ref_C.write_matrix(file_d); - std::ofstream file_c("gemm-GPU.csv"); - C.write_matrix(file_c); - } - else - { - printf("PASS, "); - } - } - } - fflush(stdout); - - // - // Warmup and timing iterations - // - - if (g_timing_iterations > 0) - { - // Warmup for 1/100 of the timing iterations (minimum of 2) - for (int i = 0; i < __NV_STD_MAX(2, (g_timing_iterations + 99) / 100); ++i) - { - CUDA_PERROR(test_func( - g_cublas_handle, - m, - n, - k, - A.d_data(), - B.d_data(), - C.d_data(), - alpha, - beta, - stream, - false).result); - } - } - - // Conduct timing iterations - double elapsed_ms = 0; - gpu_timer timer; - timer.start(); - - for (int i = 0; i < g_timing_iterations; i++) - { - CUDA_PERROR(test_func( - g_cublas_handle, - m, - n, - k, - A.d_data(), - B.d_data(), - C.d_data(), - alpha, - beta, - stream, - false).result); - } - - timer.stop(); - elapsed_ms += timer.elapsed_millis(); - double avg_ms = elapsed_ms / g_timing_iterations; - - // Display performance - if (g_timing_iterations > 0) - { - int64_t num_flops = (2 * int64_t(m) * int64_t(n) * int64_t(k)) + (2 * int64_t(m) * int64_t(n)); - double gflops_per_sec = double(num_flops) / avg_ms / 1.0e6; - - if (g_schmoo) - { - if (is_failed) - printf("F"); - - printf(", %.3f", gflops_per_sec); - - // Sleep for a few milliseconds to cool - sleep_millis(10); - } - else - { - printf("Avg runtime: %.3f ms, total flops: %lld, GFLOP/s: %.2f\n", - avg_ms, - num_flops, - gflops_per_sec); - } - fflush(stdout); - } - } - - return is_failed; -} - -/** - * Compute C = (alpha * A * B) + (beta * C) - */ -template < - math_operation_class_t math_op, - matrix_transform_t::kind_t TransformA, ///< Transformation op for matrix A - matrix_transform_t::kind_t TransformB, ///< Transformation op for matrix B - typename value_t, ///< Multiplicand value type (matrices A and B) - typename accum_t> ///< Accumulator value type (matrix C and scalars) -bool test( - int m, ///< Height of C in rows - int n, ///< Width of C in columns - int k, ///< Width (height) of A (B) - accum_t alpha, ///< Multiplicand scalar - accum_t beta) ///< Addend scalar -{ - uint64_t flop_base = 1ull << 41; - int max_timing_iterations = 10000; - int min_timing_iterations = 10; - - bool test_error = false; - - // Scale the number of timing iterations with respect to problem size (if not specified on commandline) - if ((g_timing_iterations < 0) || g_schmoo) - { - uint64_t num_flops = (2 * uint64_t(m) * uint64_t(n) * uint64_t(k)) + (2 * uint64_t(m) * uint64_t(n)); - g_timing_iterations = (int) ((flop_base / sizeof(value_t)) / num_flops); - - g_timing_iterations = (int) __NV_STD_MIN(max_timing_iterations, g_timing_iterations); - g_timing_iterations = (int) __NV_STD_MAX(min_timing_iterations, g_timing_iterations); - } - - if (g_schmoo) - { - printf("%d, %d, %d, %c%c, %d, %d", - m, n, k, - (TransformA == matrix_transform_t::NonTranspose) ? 'n' : 't', - (TransformB == matrix_transform_t::NonTranspose) ? 'n' : 't', - m * n, - g_timing_iterations); - } - else - { - printf("\n------------------------------------------------------------\n"); - printf("%dx%dx%d, GEMM_%c%c, %d C elements, %d timing iterations\n", - m, n, k, - (TransformA == matrix_transform_t::NonTranspose) ? 'n' : 't', - (TransformB == matrix_transform_t::NonTranspose) ? 'n' : 't', - m * n, - g_timing_iterations); - } - fflush(stdout); - - // CUBLAS - test_error |= test< - cublas_gemm, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - // CUTLASS - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - test_error |= test< - cutlass_gemm_dispatch, - TransformA, - TransformB, - value_t, - accum_t>(m, n, k, accum_t(alpha), accum_t(beta)); - - return test_error; -} - - - - -/****************************************************************************** - * Main - ******************************************************************************/ - - -/** - * Main - */ -int main(int argc, const char **argv) -{ - // - // Problem type (compiler-supplied so we don't compile everything) - // - - // Define value_t and accum_t (multiplicand and accumulator types, respectively) -#if defined(TEST_SGEMM) - typedef float value_t; - typedef float accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_DGEMM) - typedef double value_t; - typedef double accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_HGEMM) - typedef __half value_t; - typedef __half accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_IGEMM) - typedef int8_t value_t; - typedef int32_t accum_t; - const math_operation_class_t math_op = math_operation_class_t::scalar; -#elif defined(TEST_WGEMM) - typedef half value_t; - typedef float accum_t; - const math_operation_class_t math_op = math_operation_class_t::matrix; -#else - #error Unknown GEMM type requested. -#endif - - - // Define transpose constants -#ifdef TRANSPOSE_A - static const matrix_transform_t::kind_t TransformA = matrix_transform_t::Transpose; -#else - static const matrix_transform_t::kind_t TransformA = matrix_transform_t::NonTranspose; -#endif - -#ifdef TRANSPOSE_B - static const matrix_transform_t::kind_t TransformB = matrix_transform_t::Transpose; -#else - static const matrix_transform_t::kind_t TransformB = matrix_transform_t::NonTranspose; -#endif - - - // - // Commandline parsing - // - - // Initialize command line - command_line args(argc, argv); - - int m_factor = args.device_prop.multiProcessorCount * 128; - int m = round_nearest(4096, m_factor); - int k = 4096; - int n = 4096; - float alpha = 1.0; - float beta = 0.0; - - g_device_id = args.device_id; - args.get_cmd_line_argument("m", m); - args.get_cmd_line_argument("n", n); - args.get_cmd_line_argument("k", k); - args.get_cmd_line_argument("i", g_timing_iterations); - args.get_cmd_line_argument("alpha", alpha); - args.get_cmd_line_argument("beta", beta); - args.get_cmd_line_argument("schmoo", g_schmoo); - - // Print usage - if (args.check_cmd_line_flag("help")) - { - printf("%s " - "[--help] " - "[--i=] " - "[--device=] " - "[--alpha= --beta=] " - "[--schmoo= || --m= --n= --k=]" - "\n", argv[0]); - exit(0); - } - - // Initialize cuBLAS - if (cublasCreate(&g_cublas_handle) != CUBLAS_STATUS_SUCCESS) - { - fprintf(stderr, "cublasCreate() failed\n"); - exit(1); - } - - bool test_error = false; - - if (g_schmoo) - { - // Run a schmoo of problem sizes - printf("M, N, K, transpose, total_flops, timing_iterations, sol_flop/s, cublas_sol, cutlass_small_sol, cutlass_med_sol, cutlass_large_sol, cutlass_tall_sol, cutlass_wide_sol, cutlass_huge_sol\n"); - - // Generate power-law distribution from [32, 16384) - std::mt19937 gen(0); - std::uniform_real_distribution dis(5, 14); - for (int i = 0; i < g_schmoo; ++i) - { - int m = int(pow(float(2), dis(gen))); - int n = int(pow(float(2), dis(gen))); - int k = int(pow(float(2), dis(gen))); - - // Round m and n to nearest multiple of 32 if < 128, otherwise to the nearest 128 - m = (m < 128) ? - round_nearest(m, 32) : - round_nearest(m, 128); - n = (n < 128) ? - round_nearest(n, 32) : - round_nearest(n, 128); - - // Round k to the nearest 16 - k = (sizeof(value_t) == 1) ? - round_nearest(k, 32) : - round_nearest(k, 16); - - test_error |= test( - m, n, k, - from_float(alpha), - from_float(beta)); - - printf("\n"); fflush(stdout); - } - } - else - { - // Test a single GEMM problem size - test_error |= test( - m, - n, - k, - from_float(alpha), - from_float(beta)); - } - - // Cleanup - cublasDestroy(g_cublas_handle); - - return test_error; -} - diff --git a/cutlass_test/util/command_line.h b/cutlass_test/util/command_line.h deleted file mode 100644 index 9bdc99d9d9..0000000000 --- a/cutlass_test/util/command_line.h +++ /dev/null @@ -1,320 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - - -#pragma once - -/** - * \file - * Utility for parsing command line arguments - */ - -#include -#include -#include -#include -#include - -#include -#include - - -namespace cutlass { - -/****************************************************************************** - * command_line - ******************************************************************************/ - -/** - * Utility for parsing command line arguments - */ -struct command_line -{ - - std::vector keys; - std::vector values; - std::vector args; - int device_id; - cudaDeviceProp device_prop; - float device_giga_bandwidth; - size_t device_free_physmem; - size_t device_total_physmem; - - /** - * Constructor - */ - command_line(int argc, const char **argv, int device_id = -1) : - keys(10), - values(10), - device_id(device_id) - { - using namespace std; - - for (int i = 1; i < argc; i++) - { - string arg = argv[i]; - - if ((arg[0] != '-') || (arg[1] != '-')) - { - args.push_back(arg); - continue; - } - - string::size_type pos; - string key, val; - if ((pos = arg.find('=')) == string::npos) { - key = string(arg, 2, arg.length() - 2); - val = ""; - } else { - key = string(arg, 2, pos - 2); - val = string(arg, pos + 1, arg.length() - 1); - } - - keys.push_back(key); - values.push_back(val); - } - - // Initialize device - CUDA_PERROR_EXIT(device_init()); - } - - - /** - * Checks whether a flag "--" is present in the commandline - */ - bool check_cmd_line_flag(const char* arg_name) - { - using namespace std; - - for (int i = 0; i < int(keys.size()); ++i) - { - if (keys[i] == string(arg_name)) - return true; - } - return false; - } - - - /** - * Returns number of naked (non-flag and non-key-value) commandline parameters - */ - template - int num_naked_args() - { - return args.size(); - } - - - /** - * Returns the commandline parameter for a given index (not including flags) - */ - template - void get_cmd_line_argument(int index, value_t &val) - { - using namespace std; - if (index < args.size()) { - istringstream str_stream(args[index]); - str_stream >> val; - } - } - - /** - * Returns the value specified for a given commandline parameter --= - */ - template - void get_cmd_line_argument(const char *arg_name, value_t &val) - { - using namespace std; - - for (int i = 0; i < int(keys.size()); ++i) - { - if (keys[i] == string(arg_name)) - { - istringstream str_stream(values[i]); - str_stream >> val; - } - } - } - - - /** - * Returns the values specified for a given commandline parameter --=,* - */ - template - void get_cmd_line_arguments( - const char *arg_name, - std::vector &vals, - char sep = ',') - { - using namespace std; - - if (check_cmd_line_flag(arg_name)) - { - // Clear any default values - vals.clear(); - - // Recover from multi-value string - for (int i = 0; i < keys.size(); ++i) - { - if (keys[i] == string(arg_name)) - { - string val_string(values[i]); - istringstream str_stream(val_string); - string::size_type old_pos = 0; - string::size_type new_pos = 0; - - // Iterate -delimited values - value_t val; - while ((new_pos = val_string.find(sep, old_pos)) != string::npos) - { - if (new_pos != old_pos) - { - str_stream.width(new_pos - old_pos); - str_stream >> val; - vals.push_back(val); - } - - // skip over delimiter - str_stream.ignore(1); - old_pos = new_pos + 1; - } - - // Read last value - str_stream >> val; - vals.push_back(val); - } - } - } - } - - - /** - * The number of pairs parsed - */ - int parsed_argc() - { - return (int) keys.size(); - } - - /** - * Initialize device - */ - cudaError_t device_init() - { - cudaError_t error = cudaSuccess; - - do - { - int deviceCount; - if (CUDA_PERROR(error = cudaGetDeviceCount(&deviceCount))) break; - - if (deviceCount == 0) { - fprintf(stderr, "No devices supporting CUDA.\n"); - exit(1); - } - if (device_id < 0) - { - get_cmd_line_argument("device", device_id); - } - if ((device_id > deviceCount - 1) || (device_id < 0)) - { - device_id = 0; - } - - if (CUDA_PERROR(error = cudaSetDevice(device_id))) break; - - if (CUDA_PERROR(error = cudaMemGetInfo(&device_free_physmem, &device_total_physmem))) break; - - if (CUDA_PERROR(error = cudaGetDeviceProperties(&device_prop, device_id))) break; - - if (device_prop.major < 1) { - fprintf(stderr, "Device does not support CUDA.\n"); - exit(1); - } - - device_giga_bandwidth = float(device_prop.memoryBusWidth) * device_prop.memoryClockRate * 2 / 8 / 1000 / 1000; - - } while (0); - - return error; - } - - - //------------------------------------------------------------------------- - // Utility functions - //------------------------------------------------------------------------- - - /// Tokenizes a comma-delimited list of string pairs delimited by ':' - static void tokenize( - std::vector > &tokens, - std::string const &str, - char delim = ',', - char sep = ':') - { - // Home-built to avoid Boost dependency - size_t s_idx = 0; - size_t d_idx = std::string::npos; - while (s_idx < str.size()) - { - d_idx = str.find_first_of(delim, s_idx); - - size_t end_idx = (d_idx != std::string::npos ? d_idx : str.size()); - size_t sep_idx = str.find_first_of(sep, s_idx); - size_t offset = 1; - if (sep_idx == std::string::npos || sep_idx >= end_idx) - { - sep_idx = end_idx; - offset = 0; - } - - std::pair item( - str.substr(s_idx, sep_idx - s_idx), - str.substr(sep_idx + offset, end_idx - sep_idx - offset)); - - tokens.push_back(item); - s_idx = end_idx + 1; - } - } - - /// Tokenizes a comma-delimited list of string pairs delimited by ':' - static void tokenize( - std::vector &tokens, - std::string const &str, - char delim = ',', - char sep = ':') - { - std::vector > token_pairs; - tokenize(token_pairs, str, delim, sep); - for (auto const &tok : token_pairs) - { - tokens.push_back(tok.first); - } - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/util/exceptions.h b/cutlass_test/util/exceptions.h deleted file mode 100644 index 7f12e69c87..0000000000 --- a/cutlass_test/util/exceptions.h +++ /dev/null @@ -1,91 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief C++ exception semantics for CUDA error codes - */ - -#include -#include - - -namespace cutlass { - - -/// C++ exception wrapper for CUDA \p cudaError_t -class cuda_exception : public std::exception -{ -public: - - /// Constructor - cuda_exception( - const char *msg = "", - cudaError_t err = cudaErrorUnknown) - : - msg(msg), err(err) - {} - - /// Returns the explanatory string - const char *what() const noexcept - { - return msg; - } - - /// Returns the underlying CUDA \p cudaError_t - cudaError_t cudaError() const - { - return err; - } - - -protected: - - /// Explanatory string - const char *msg; - - /// Underlying CUDA \p cudaError_t - cudaError_t err; -}; - - -/// Writes a cudaError_t to an output stream -inline std::ostream & operator<<(std::ostream &out, cudaError_t result) -{ - return out << cudaGetErrorString(result); -} - -/// Writes a cuda_exception instance to an output stream -inline std::ostream & operator<<(std::ostream &out, cuda_exception const &e) -{ - return out << e.what() << ": " << e.cudaError(); -} - - -} // namespace cutlass diff --git a/cutlass_test/util/half.h b/cutlass_test/util/half.h deleted file mode 100644 index cef2ccecc1..0000000000 --- a/cutlass_test/util/half.h +++ /dev/null @@ -1,231 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ -#pragma once - -/** - * \file - * Utilities for interacting with the opaque CUDA __half type - */ - -#include -#include -#include - -namespace cutlass { - - -/****************************************************************************** - * half_t - ******************************************************************************/ - -/** - * Host-based fp16 data type compatible and convertible with __half - */ -struct half_t -{ - uint16_t __x; - - /// Constructor from __half - half_t(const __half &other) - { - __x = reinterpret_cast(other); - } - - /// Constructor from integer - half_t(int a) - { - *this = half_t(float(a)); - } - - - /// Constructor from float - half_t(float a) - { - uint32_t ia = *reinterpret_cast(&a); - uint16_t ir; - - ir = (ia >> 16) & 0x8000; - - if ((ia & 0x7f800000) == 0x7f800000) - { - if ((ia & 0x7fffffff) == 0x7f800000) - { - ir |= 0x7c00; /* infinity */ - } - else - { - ir = 0x7fff; /* canonical NaN */ - } - } - else if ((ia & 0x7f800000) >= 0x33000000) - { - int32_t shift = (int32_t) ((ia >> 23) & 0xff) - 127; - if (shift > 15) - { - ir |= 0x7c00; /* infinity */ - } - else - { - ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */ - if (shift < -14) - { /* denormal */ - ir |= ia >> (-1 - shift); - ia = ia << (32 - (-1 - shift)); - } - else - { /* normal */ - ir |= ia >> (24 - 11); - ia = ia << (32 - (24 - 11)); - ir = ir + ((14 + shift) << 10); - } - /* IEEE-754 round to nearest of even */ - if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) - { - ir++; - } - } - } - - this->__x = ir; - } - - /// Cast to __half - operator __half() const - { - return reinterpret_cast(__x); - } - - /// Cast to float - operator float() const - { - int sign = ((this->__x >> 15) & 1); - int exp = ((this->__x >> 10) & 0x1f); - int mantissa = (this->__x & 0x3ff); - uint32_t f = 0; - - if (exp > 0 && exp < 31) - { - // normal - exp += 112; - f = (sign << 31) | (exp << 23) | (mantissa << 13); - } - else if (exp == 0) - { - if (mantissa) - { - // subnormal - exp += 113; - while ((mantissa & (1 << 10)) == 0) - { - mantissa <<= 1; - exp--; - } - mantissa &= 0x3ff; - f = (sign << 31) | (exp << 23) | (mantissa << 13); - } - else - { - // zero - f = 0; - } - } - else if (exp == 31) - { - if (mantissa) - { - f = 0x7fffffff; // not a number - } - else - { - f = (0xff << 23) | (sign << 31); // inf - } - } - return *reinterpret_cast(&f); - } - - - /// Get raw storage - uint16_t raw() - { - return this->__x; - } - - /// Assignment by sum - bool operator ==(const half_t &other) - { - return (this->__x == other.__x); - } - - /// Increment - half_t& operator +=(const half_t &rhs) - { - *this = half_t(float(*this) + float(rhs)); - return *this; - } - - /// Decrement - half_t& operator -=(const half_t &rhs) - { - *this = half_t(float(*this) - float(rhs)); - return *this; - } - - /// Multiply - half_t operator*(const half_t &other) - { - return half_t(float(*this) * float(other)); - } - - /// Multiply - half_t operator+(const half_t &other) - { - return half_t(float(*this) + float(other)); - } - -}; - - -/****************************************************************************** - * I/O stream overloads - ******************************************************************************/ - -/// Insert formatted \p half_t into the output stream -std::ostream& operator<<(std::ostream &out, const half_t &x) -{ - out << (float)x; - return out; -} - - -/// Insert formatted \p __half into the output stream -std::ostream& operator<<(std::ostream &out, const __half &x) -{ - return out << half_t(x); -} - - -} // namespace cutlass diff --git a/cutlass_test/util/matrix.h b/cutlass_test/util/matrix.h deleted file mode 100644 index 7ae080c892..0000000000 --- a/cutlass_test/util/matrix.h +++ /dev/null @@ -1,503 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * Matrix data structure providing basic CPU-based algorithms and - * operations that can be cloned and synchronized in GPU device memory - */ - -#include -#include - -#include -#include "../cutlass/util/matrix_transform.h" -#include "half.h" - - -namespace cutlass { - -/** - * \brief Matrix data structure providing basic CPU-based algorithms and - * operations that be synchronized with a GPU-based replica - */ -template -struct matrix -{ - // Host value type (must be convertible to/from value_t) - typedef typename nv_std::conditional< - (nv_std::is_same::value), // If (value_t == __half) ... - half_t, // ... use half_t internally for host storage, else... - value_t>::type // ... use value_t directly - host_value_t; - - - //----------------------------------------------------------------------------- - // Data members - //----------------------------------------------------------------------------- - -private: - - /// M dimension (height in rows) - int _m; - - /// N dimension (width in columns) - int _n; - - /// Data array on host - std::vector _h_data; - - /// Clone of data array on GPU device - value_t *_d_data; - - /// GPU Device identifier that clone synchronizes with - int _device_id; - -public: - - //----------------------------------------------------------------------------- - // Lifetime and synchronization - //----------------------------------------------------------------------------- - - /** - * Constructor: zero-initializes the matrix. - */ - matrix( - int m, ///< Height of the matrix in rows - int n) ///< Width of the matrix in columns - : - _m(m), - _n(n), - _d_data(NULL), - _device_id(0) - { - _h_data.resize(_m * _n, 0); - CUDA_PERROR_EXIT(cudaMalloc((void ** )&_d_data, sizeof(value_t) * _m * _n)); - CUDA_PERROR_EXIT(cudaGetDevice(&_device_id)); - } - - /// Destructor - ~matrix() - { - if (_d_data) - { - CUDA_PERROR_EXIT(cudaFree(_d_data)); - } - } - - /** - * Synchronize the GPU-based replica with the current host-based matrix data - */ - void sync_device() - { - size_t bytes = _m * _n * sizeof(value_t); - CUDA_PERROR_EXIT(cudaMemcpy(_d_data, &_h_data[0], bytes, cudaMemcpyHostToDevice)); - } - - - /** - * Synchronize the host-based replica with the current GPU-based matrix data - */ - void sync_host() - { - size_t bytes = _m * _n * sizeof(value_t); - CUDA_PERROR_EXIT(cudaMemcpy(&_h_data[0], _d_data, bytes, cudaMemcpyDeviceToHost)); - } - - - //----------------------------------------------------------------------------- - // Inspectors - //----------------------------------------------------------------------------- - - /** - * Return the height of the matrix, subject to the optional \p transpose_op - */ - int height(matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _m; - case matrix_transform_t::Transpose : return _n; - default: return -1; - } - } - - - /** - * Return the width of the matrix, subject to the optional \p transpose_op - */ - int width(matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _n; - case matrix_transform_t::Transpose : return _m; - default: return -1; - } - } - - - /** - * Return item at (x, y) coordinate of matrix, subject to the optional \p transform op - */ - host_value_t get( - int x, - int y, - matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _h_data[y + (x * _m)]; - case matrix_transform_t::Transpose : return _h_data[x + (y * _m)]; - default: return 0; - } - } - - - /** - * Return the distance (in items) within memory between elements of two - * consecutive columns which have the same row index, subject to the optional \p transform op - */ - int leading_dim(matrix_transform_t transpose_op = matrix_transform_t::NonTranspose) const - { - switch (transpose_op) - { - case matrix_transform_t::NonTranspose : return _m; - case matrix_transform_t::Transpose : return _n; - default: return 0; - } - } - - /** - * Get host data pointer - */ - value_t* h_data() - { - return _h_data.data(); - } - - - /** - * Get host data pointer - */ - value_t const* h_data() const - { - return _h_data.data(); - } - - /** - * Get device data pointer - */ - value_t const* d_data() const - { - return _d_data; - } - - /** - * Get device data pointer - */ - value_t * d_data() - { - return _d_data; - } - - //----------------------------------------------------------------------------- - // Initialization - //----------------------------------------------------------------------------- - - /** - * Initialize matrix values with a 2D "ramp" defined as - * values(x, y) = (y * rs) + (x * cs) - */ - void fill_ramp( - host_value_t rs, - host_value_t cs) - { - for (int x = 0; x < _n; x++) - { - for (int y = 0; y < _m; y++) - { - _h_data[y + (x * _m)] = host_value_t((y * rs) + (x * cs)); - } - } - } - - - /** - * Initialize matrix values such that all the elements of the principal diagonal - * are ones and all other elements are zeros - */ - void fill_identity() - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] = host_value_t(i == j ? 1 : 0); - } - } - } - - - /** - * Initialize matrix values using the random number \p generator. The - * \p generator reference is assumed to be a nullary functor that returns - * values convertible to the matrix \p value_t. - */ - template - void fill_random(T & generator) - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] = (value_t) generator(); - } - } - } - - - /** - * Element-wise matrix addition - */ - matrix & operator+=(matrix const &mat) - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] += mat._h_data[i + j * _m]; - } - } - return *this; - } - - /** - * Element-wise matrix subtraction - */ - matrix & operator-=(matrix const &mat) - { - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - _h_data[i + j * _m] -= mat._h_data[i + j * _m]; - } - } - return *this; - } - - //----------------------------------------------------------------------------- - // Output - //----------------------------------------------------------------------------- - - /** - * Prints matrix in CSV to output stream - */ - template - std::ostream & write_matrix(std::ostream &out, _hv_t) - { - for (int i = 0; i < _m; i++) - { - for (int j = 0; j < _n; j++) - { - out << (j ? "," : "") << _h_data[i + j * _m]; - } - out << "\n"; - } - return out; - } - - - /** - * Prints matrix in CSV to output stream - */ - std::ostream & write_matrix(std::ostream &out, int8_t) - { - for (int i = 0; i < _m; i++) - { - for (int j = 0; j < _n; j++) - { - out << (j ? "," : "") << int32_t(_h_data[i + j * _m]); - } - out << "\n"; - } - return out; - } - - - /** - * Prints matrix in CSV to output stream - */ - std::ostream & write_matrix(std::ostream &out) - { - return write_matrix(out, _h_data[0]); - } - - - //----------------------------------------------------------------------------- - // Floating point "almost-equal" utilities - //----------------------------------------------------------------------------- - - static bool almost_equal_ulps(half_t a, half_t b, int max_ulps) - { - if (a == b) - return true; - - int32_t int_diff = abs(a.raw() - b.raw()); - if (int_diff <= max_ulps) - return true; - return false; - } - - - static bool almost_equal_ulps(float a, float b, int max_ulps) - { - if (a == b) - return true; - int32_t int_diff = abs(*(int32_t*)&a - *(int32_t*)&b); - if (int_diff <= max_ulps) - return true; - return false; - } - - - static bool almost_equal_ulps(double a, double b, int max_ulps) - { - if (a == b) - return true; - int64_t int_diff = abs(*(int64_t*)&a - *(int64_t*)&b); - if (int_diff <= max_ulps) - return true; - return false; - } - - static bool almost_equal_ulps(int32_t a, int32_t b, int max_ulps) - { - return (a == b); - } - - - //----------------------------------------------------------------------------- - // matrix operations - //----------------------------------------------------------------------------- - - - /** - * Returns matrix equality - */ - bool operator==(const matrix &mat) const - { - int max_ulps = 30; - - if (_m != mat._m || _n != mat._n) - { - fprintf(stderr, "Error: dimension mismatch during matrix comparison.\n"); exit(1); - } - - for (int j = 0; j < _n; j++) - { - for (int i = 0; i < _m; i++) - { - if (!almost_equal_ulps(_h_data[i + j * _m], mat._h_data[i + j * _m], max_ulps)) - { - return false; - } - } - } - return true; - } - - - /** - * Returns matrix inequality - */ - bool operator!=(const matrix &mat) const - { - return !(*this == mat); - } - - - /** - * Computes this = (alpha * op(A) * op(B)) + (beta * this), specialized for gemm_nn - */ - template - void gemm( - matrix_transform_t transform_a, - matrix_transform_t transform_b, - host_value_t alpha, - const matrix &A, - const matrix &B, - host_value_t beta) - { - // Sanity check dimensions - if ((_m != A.height(transform_a)) || - (_n != B.width(transform_b)) || - (A.width(transform_a) != B.height(transform_b))) - { - fprintf(stderr, "Error: dimension mismatch during gemm.\n"); - exit(1); - } - - int M = A.height(transform_a); - int K = A.width(transform_a); - int N = B.width(transform_b); - - // Even the host-side implementation utilizes a blocking structure to improve - // verification performance - int DimBlockM = (M % 16 == 0) ? 16 : 1; - int DimBlockN = (N % 16 == 0) ? 16 : 1; - - for (int i = 0; i < M; i += DimBlockM) - { - for (int j = 0; j < N; j += DimBlockN) - { - for (int block_y = 0; block_y < DimBlockM; block_y++) - { - for (int block_x = 0; block_x < DimBlockN; block_x++) - { - int y = i + block_y; - int x = j + block_x; - - host_value_t accum(0); - for (int k = 0; k < K; k++) - { - accum += host_value_t(A.get(k, y, transform_a)) * host_value_t(B.get(x, k, transform_b)); - } - - _h_data[y + x * M] = (alpha * accum) + (beta * _h_data[y + x * M]); - } - } - } - } - } -}; - - -} // namespace cutlass diff --git a/cutlass_test/util/timer.h b/cutlass_test/util/timer.h deleted file mode 100644 index b13db97689..0000000000 --- a/cutlass_test/util/timer.h +++ /dev/null @@ -1,107 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * GPU kernel timer - */ - -#include - -#include - -namespace cutlass { - - -/****************************************************************************** - * gpu_timer - ******************************************************************************/ - -/** - * GPU event-based timer - */ -struct gpu_timer -{ - cudaEvent_t _start; - cudaEvent_t _stop; - - gpu_timer() - { - CUDA_PERROR_EXIT(cudaEventCreate(&_start)); - CUDA_PERROR_EXIT(cudaEventCreate(&_stop)); - } - - ~gpu_timer() - { - CUDA_PERROR_EXIT(cudaEventDestroy(_start)); - CUDA_PERROR_EXIT(cudaEventDestroy(_stop)); - } - - void start() - { - CUDA_PERROR_EXIT(cudaEventRecord(_start, 0)); - } - - void stop() - { - CUDA_PERROR_EXIT(cudaEventRecord(_stop, 0)); - } - - float elapsed_millis() - { - float elapsed = 0.0; - CUDA_PERROR_EXIT(cudaEventSynchronize(_stop)); - CUDA_PERROR_EXIT(cudaEventElapsedTime(&elapsed, _start, _stop)); - return elapsed; - } -}; - - -/****************************************************************************** - * sleep_millis - ******************************************************************************/ - -#ifdef _WIN32 - #include - - void sleep_millis(unsigned milliseconds) - { - Sleep(milliseconds); - } -#else - #include - - void sleep_millis(unsigned milliseconds) - { - usleep(milliseconds * 1000); // takes microseconds - } -#endif - - -} // namespace cutlass diff --git a/cutlass_test/util/type_conversion.h b/cutlass_test/util/type_conversion.h deleted file mode 100644 index fd55ff65da..0000000000 --- a/cutlass_test/util/type_conversion.h +++ /dev/null @@ -1,163 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -#pragma once - -/** - * \file - * \brief Utilities for converting between types and assessing traits - */ - -#include "half.h" - -namespace cutlass { - -/****************************************************************************** - * Float conversion utilities - ******************************************************************************/ - -/// Convert float to value type -template -value_t from_float(float val) -{ - return value_t(val); -} - -/// Convert float to value type (__half specialization) -template <> -__half from_float<__half>(float val) -{ - return half_t(val); -} - - -/****************************************************************************** - * Type conversion utilities - ******************************************************************************/ - -/// Member \p type is defined as the signed integer type having the same size as \p T -template -struct integer_alias; - -template <> -struct integer_alias { - using type = int8_t; -}; - -template <> -struct integer_alias { - using type = int16_t; -}; - -template <> -struct integer_alias<__half> { - using type = int16_t; -}; - -template <> -struct integer_alias { - using type = int32_t; -}; - -template <> -struct integer_alias { - using type = int32_t; -}; - -template <> -struct integer_alias { - using type = int64_t; -}; - - - -/****************************************************************************** - * Type-info utilities - ******************************************************************************/ - -/// Returns a string to prefix 'gemm' to construct CUBLAS-like kernel names -template char const *to_prefix_string(); - -template <> char const *to_prefix_string() { - return "H"; -} - -template <> char const *to_prefix_string() { - return "H"; -} - -template <> char const *to_prefix_string() { - return "S"; -} - -template <> char const *to_prefix_string() { - return "WmmaH"; -} - -template <> char const *to_prefix_string() { - return "WmmaS"; -} - -template <> char const *to_prefix_string() { - return "D"; -} - -template <> char const *to_prefix_string() { - return "I"; -} - - -/****************************************************************************** - * Maps value_t to the minimum vector size used to load operand - ******************************************************************************/ - -template -struct operand_load_type; - -template <> -struct operand_load_type { using type = int32_t; }; - -template -struct operand_load_type { using type = T; }; - - -/****************************************************************************** - * Minimum alignment requirement, if any, determined from value_t. - ******************************************************************************/ - -template -struct gemm_alignment_requirement; - -template <> -struct gemm_alignment_requirement { static const int value = 4; }; - -template -struct gemm_alignment_requirement { static const int value = 0; }; - - - -} // namespace cutlass diff --git a/docs/generated-html/annotated.html b/docs/generated-html/annotated.html new file mode 100644 index 0000000000..e6c405d597 --- /dev/null +++ b/docs/generated-html/annotated.html @@ -0,0 +1,378 @@ + + + + + + + +Cutlass: Class List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class List
+
+
+
Here are the classes, structs, unions and interfaces with brief descriptions:
+
[detail level 1234]

 Ncutlass
 Ngemm
 Nplatform
 CAlignedStruct
 CComputeOffsetFromShapeCompute the offset for the given coordinates in a cube
 CComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 CComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 CComputeOffsetFromStridesCompute the offset for the given coordinates in a cube
 CComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 CComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 CComputeThreadOffsetFromStridesDecompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
 CComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
 CComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
 CConstPredicateTileAdapterAdapter to enable random access to predicates via logical coordinate within a tile
 CConvert
 CConvert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 CCoordStatically-sized array specifying Coords within a tensor
 CCopy
 Cdivide_assert
 CExtentReturns the extent of a scalar or vector
 CExtent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
 CExtent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
 CFragmentA template defining Fragment Concept
 CFragmentConstIterator
 CFragmentIteratorA template defining Fragment Iterator Concept
 CFragmentLoad
 CFragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentStore
 CFragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CFragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 CGemmOperandGemm operand - D = A * B + C
 CIdentityDescribes identity elements
 Cis_pow2
 CIteratorAdvanceSpecifies dimension in which post-increment accesses advance
 CIteratorFragmentSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
 CLoad
 CLoad< double, 2, Memory_, true, 16 >
 CLoad< Scalar_, Lanes_, Memory_, true, 16 >
 CLoad< Scalar_, Lanes_, Memory_, true, 4 >
 CLoad< Scalar_, Lanes_, Memory_, true, 8 >
 Clog2_down
 Clog2_down< N, 1, Count >
 Clog2_up
 Clog2_up< N, 1, Count >
 CMatrixLayoutDescribes layouts of matrices
 CMemorySpaceEnum to specify which memory space data resides in
 CPredicateTileAdapterAdapter to enable random access to predicates via logical coordinate within a tile
 CPredicateVectorStatically sized array of bits implementing
 CReshapeTile
 CReshapeTile< Tile_, kAccessSize_, true >
 CShapeA Shape implementing Layout Concept describing the dimensions of a cube
 CShapeAdd
 CShapeCountCompute derived counted of a Layout Concept based class
 CShapeDiv
 CShapeMax
 CShapeMin
 CShapeMul
 CShapeScale
 CShapeStrides
 CShapeSub
 Csqrt_est
 CStorageType
 CStorageType< 1 >
 CStorageType< 2 >
 CStorageType< 4 >
 CStore
 CStore< double, 2, Memory_, true, 16 >
 CStore< Scalar_, Lanes_, Memory_, true, 16 >
 CStore< Scalar_, Lanes_, Memory_, true, 4 >
 CStore< Scalar_, Lanes_, Memory_, true, 8 >
 CTensorRefStructure modeling a pointer and stride into a tensor
 CTensorViewHost-side reference implementation of tensor operations
 CTiledThreadOffsetBasic thread offset function computed from a thread shape
 CTileIteratorBaseIterator for accessing a stripmined tile in memory
 CTileLoadIteratorAn iterator implementing Tile Load Iterator Concept for loading a tile from memory
 CTileStoreIteratorAn iterator implementing Tile Store Iterator Concept for storing a tile to memory
 CTileTraitsA template defining Tile Traits Concept
 CTileTraitsContiguousMajor
 CTileTraitsStandardChooses 'best' shape to enable warp raking along contiguous dimension if possible
 CTileTraitsStrideMajor
 CTileTraitsWarpRakeTiling in which warps rake across the contiguous dimension
 CTrivialPredicateTileAdapterAlways returns true predicate
 CVector
 CVector< half, kLanes_ >
 CVectorize
 CVectorize< Element_, 1 >
 CVectorTraitsTraits describing properties of vectors and scalar-as-vectors
 CVectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
 CVectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
+
+
+ + + + diff --git a/docs/generated-html/bc_s.png b/docs/generated-html/bc_s.png new file mode 100644 index 0000000000000000000000000000000000000000..c3e55261276583d424419224b644eab1e6e21552 GIT binary patch literal 682 zcmV;b0#*HqP)z!w1EKx!J+S4bu+H4%{}zQ=+9zya^|pIIYPbEnr?`jd@|$JK#Zj&L`4 zT8}}WtK3D2bUdPd=rR(7Kk@3<)c?N#v;zRZ0q?vyrAHAFBl*fQBA`$iQA5MoCbu`N z)8niLnv)XIEmDg&`QyKb*&RCiPGs_YBG3_^QlYWDFsg=3jiEs{n>p=d0HcQ<7p!Ra5!YQxa%DjPk16eT!Yu2>;?c1 zp**MGMg&viWPIcWUVB_;EUuNPwcYJlGQ(D5qL`MQz+s zp~nquWsOymQ>@4d1TS`P(_@=rFDMnMH4vK~yTlPLbZ=e2e(h79?AOOdvBX%MBb@K5 zK5DbLYi*J;#ESI*zyZ&7R)sxsky$`m8%u=KcQ+bk(wadK;-BMVe-(B64MT~z)I8V> Q+yDRo07*qoM6N<$f)0!?h5!Hn literal 0 HcmV?d00001 diff --git a/docs/generated-html/bdwn.png b/docs/generated-html/bdwn.png new file mode 100644 index 0000000000000000000000000000000000000000..cb6ebb167f56a9ff6e47c34aba0a27b545be3533 GIT binary patch literal 147 zcmeAS@N?(olHy`uVBq!ia0vp^>_E)H!3HEvS)PKZC{Gv1kP61Pb5HX&C>=yU8{DlC(~a0loEPc7NxAT_YC{K>^~DI?Vx@3tK_ZbN6qu6 wEfk(&7dBs%_59MxzT*A&H*%P4;6BK@ + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator Member List
+
+ + + + + diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html new file mode 100644 index 0000000000..1fbdc759c7 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1ConstIterator.html @@ -0,0 +1,389 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator Class Reference
+
+
+ +

A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. +

+ +

#include <predicate_vector.h>

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE ConstIterator (ConstIterator const &it)
 Copy constructor. More...
 
CUTLASS_HOST_DEVICE ConstIterator (PredicateVector const &_vec, int _start=0)
 
CUTLASS_HOST_DEVICE ConstIteratoroperator++ ()
 Pre-increment. More...
 
CUTLASS_HOST_DEVICE ConstIteratoroperator-- ()
 Pre-decrement. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator++ (int)
 Post-increment. More...
 
CUTLASS_HOST_DEVICE ConstIterator operator-- (int)
 Post-decrement. More...
 
CUTLASS_HOST_DEVICE bool operator== (ConstIterator const &it) const
 Returns true if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool operator!= (ConstIterator const &it) const
 Returns false if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool operator* () const
 Dereferences iterator. More...
 
+

Constructor & Destructor Documentation

+ +

◆ ConstIterator() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::ConstIterator (ConstIterator const & it)
+
+inline
+
+ +
+
+ +

◆ ConstIterator() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::ConstIterator (PredicateVector const & _vec,
int _start = 0 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ operator!=()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator!= (ConstIterator const & it) const
+
+inline
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator++() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator++() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator++ (int )
+
+inline
+
+ +
+
+ +

◆ operator--() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator--() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator-- (int )
+
+inline
+
+ +
+
+ +

◆ operator==()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator::operator== (ConstIterator const & it) const
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html new file mode 100644 index 0000000000..ca3ff04aa1 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator Member List
+
+ + + + + diff --git a/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html new file mode 100644 index 0000000000..42a0693823 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1PredicateVector_1_1Iterator.html @@ -0,0 +1,451 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator Class Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator Class Reference
+
+
+ +

An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. +

+ +

#include <predicate_vector.h>

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE Iterator (Iterator const &it)
 Copy constructor. More...
 
CUTLASS_HOST_DEVICE Iterator (PredicateVector &_vec, int _start=0)
 Constructs an iterator from a PredicateVector. More...
 
CUTLASS_HOST_DEVICE Iteratoroperator++ ()
 Pre-increment. More...
 
CUTLASS_HOST_DEVICE Iteratoroperator-- ()
 Pre-decrement. More...
 
CUTLASS_HOST_DEVICE Iterator operator++ (int)
 Post-increment. More...
 
CUTLASS_HOST_DEVICE Iterator operator-- (int)
 Post-decrement. More...
 
CUTLASS_HOST_DEVICE bool operator== (Iterator const &it) const
 Returns true if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool operator!= (Iterator const &it) const
 Returns false if iterators point to the same bit. More...
 
CUTLASS_HOST_DEVICE bool get ()
 Gets the bit at the pointed to location. More...
 
CUTLASS_HOST_DEVICE bool operator* () const
 Dereferences iterator. More...
 
CUTLASS_HOST_DEVICE void set (bool value=true)
 Sets the bit at the pointed to location. More...
 
+

Constructor & Destructor Documentation

+ +

◆ Iterator() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::Iterator (Iterator const & it)
+
+inline
+
+ +
+
+ +

◆ Iterator() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::Iterator (PredicateVector_vec,
int _start = 0 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::get ()
+
+inline
+
+ +
+
+ +

◆ operator!=()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator!= (Iterator const & it) const
+
+inline
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator++() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Iterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator++ ()
+
+inline
+
+ +
+
+ +

◆ operator++() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator++ (int )
+
+inline
+
+ +
+
+ +

◆ operator--() [1/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Iterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator-- ()
+
+inline
+
+ +
+
+ +

◆ operator--() [2/2]

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator-- (int )
+
+inline
+
+ +
+
+ +

◆ operator==()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::operator== (Iterator const & it) const
+
+inline
+
+ +
+
+ +

◆ set()

+ +
+
+
+template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator::set (bool value = true)
+
+inline
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorRef-members.html b/docs/generated-html/classcutlass_1_1TensorRef-members.html new file mode 100644 index 0000000000..4bf37ad133 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorRef-members.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorRef< Storage_, Rank_ > Member List
+
+
+ +

This is the complete list of members for cutlass::TensorRef< Storage_, Rank_ >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + +
advance(Coord< Rank > const &b)cutlass::TensorRef< Storage_, Rank_ >inline
at(Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
at(int idx) constcutlass::TensorRef< Storage_, Rank_ >inline
convert()cutlass::TensorRef< Storage_, Rank_ >inline
data() constcutlass::TensorRef< Storage_, Rank_ >inline
good() constcutlass::TensorRef< Storage_, Rank_ >inline
leading_dim() constcutlass::TensorRef< Storage_, Rank_ >inline
offset(Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
operator+(Coord< Rank > const &b) constcutlass::TensorRef< Storage_, Rank_ >inline
operator-(Coord< Rank > const &b) constcutlass::TensorRef< Storage_, Rank_ >inline
operator[](Coord< Rank > const &coord) constcutlass::TensorRef< Storage_, Rank_ >inline
operator[](int idx) constcutlass::TensorRef< Storage_, Rank_ >inline
Rankcutlass::TensorRef< Storage_, Rank_ >static
reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))cutlass::TensorRef< Storage_, Rank_ >inline
Storage typedefcutlass::TensorRef< Storage_, Rank_ >
stride() constcutlass::TensorRef< Storage_, Rank_ >inline
stride(int dim) constcutlass::TensorRef< Storage_, Rank_ >inline
TensorRef()cutlass::TensorRef< Storage_, Rank_ >inline
TensorRef(Storage *ptr, Coord< Rank > stride)cutlass::TensorRef< Storage_, Rank_ >inline
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorRef.html b/docs/generated-html/classcutlass_1_1TensorRef.html new file mode 100644 index 0000000000..05a9b3dd52 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorRef.html @@ -0,0 +1,704 @@ + + + + + + + +Cutlass: cutlass::TensorRef< Storage_, Rank_ > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorRef< Storage_, Rank_ > Class Template Reference
+
+
+ +

Structure modeling a pointer and stride into a tensor. +

+ +

#include <tensor_ref.h>

+ + + + + +

+Public Types

typedef Storage_ Storage
 Data type of individual access. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE TensorRef ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Coord< Rank > stride)
 Constructs from a pointer, size, and stride. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
 Updates the pointer, stride, and location within a TensorRef. More...
 
template<typename T >
TensorRef< T, Rankconvert ()
 Conversion function. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE Coord< Rank > const & stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE int leading_dim () const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE long long offset (Coord< Rank > const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (Coord< Rank > const &coord) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE Storageat (int idx) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (int idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorRefadvance (Coord< Rank > const &b)
 Adds an offset to the pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
+ + + + +

+Static Public Attributes

static int const Rank = Rank_
 Rank of tensor. More...
 
+

Member Typedef Documentation

+ +

◆ Storage

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + +
typedef Storage_ cutlass::TensorRef< Storage_, Rank_ >::Storage
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ TensorRef() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_ >::TensorRef ()
+
+inline
+
+ +
+
+ +

◆ TensorRef() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorRef< Storage_, Rank_ >::TensorRef (Storageptr,
Coord< Rankstride 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ advance()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef& cutlass::TensorRef< Storage_, Rank_ >::advance (Coord< Rank > const & b)
+
+inline
+
+ +
+
+ +

◆ at() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_ >::at (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ at() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Storage& cutlass::TensorRef< Storage_, Rank_ >::at (int idx) const
+
+inline
+
+ +
+
+ +

◆ convert()

+ +
+
+
+template<typename Storage_, int Rank_>
+
+template<typename T >
+ + + + + +
+ + + + + + + +
TensorRef<T, Rank> cutlass::TensorRef< Storage_, Rank_ >::convert ()
+
+inline
+
+ +
+
+ +

◆ data()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Storage* cutlass::TensorRef< Storage_, Rank_ >::data () const
+
+inline
+
+ +
+
+ +

◆ good()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorRef< Storage_, Rank_ >::good () const
+
+inline
+
+ +
+
+ +

◆ leading_dim()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::TensorRef< Storage_, Rank_ >::leading_dim () const
+
+inline
+
+ +
+
+ +

◆ offset()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE long long cutlass::TensorRef< Storage_, Rank_ >::offset (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator+()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_ >::operator+ (Coord< Rank > const & b) const
+
+inline
+
+ +
+
+ +

◆ operator-()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorRef cutlass::TensorRef< Storage_, Rank_ >::operator- (Coord< Rank > const & b) const
+
+inline
+
+ +
+
+ +

◆ operator[]() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
Storage& cutlass::TensorRef< Storage_, Rank_ >::operator[] (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator[]() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
Storage& cutlass::TensorRef< Storage_, Rank_ >::operator[] (int idx) const
+
+inline
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorRef< Storage_, Rank_ >::reset (Storageptr = nullptr,
Coord< Rankstride = Coord<Rank>(0) 
)
+
+inline
+
+ +
+
+ +

◆ stride() [1/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Coord<Rank> const& cutlass::TensorRef< Storage_, Rank_ >::stride () const
+
+inline
+
+ +
+
+ +

◆ stride() [2/2]

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int const& cutlass::TensorRef< Storage_, Rank_ >::stride (int dim) const
+
+inline
+
+ +
+
+

Member Data Documentation

+ +

◆ Rank

+ +
+
+
+template<typename Storage_, int Rank_>
+ + + + + +
+ + + + +
int const cutlass::TensorRef< Storage_, Rank_ >::Rank = Rank_
+
+static
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorView-members.html b/docs/generated-html/classcutlass_1_1TensorView-members.html new file mode 100644 index 0000000000..e9401f9cc9 --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorView-members.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::TensorView< T > Member List
+
+
+ +

This is the complete list of members for cutlass::TensorView< T >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
advance(Coord< Rank > const &b)cutlass::TensorRef< T, 4 >inline
at(Coord_t const &coord) constcutlass::TensorView< T >inline
at(Offset_t idx) constcutlass::TensorView< T >inline
Base typedefcutlass::TensorView< T >
const_ref()cutlass::TensorView< T >inline
ConstTensorRef_t typedefcutlass::TensorView< T >
contains(Coord_t const &coord) constcutlass::TensorView< T >inline
convert()cutlass::TensorRef< T, 4 >inline
Coord_t typedefcutlass::TensorView< T >
data() constcutlass::TensorView< T >inline
good() constcutlass::TensorView< T >inline
leading_dim() constcutlass::TensorRef< T, 4 >inline
offset(Coord_t const &coord) constcutlass::TensorView< T >inline
Offset_t typedefcutlass::TensorView< T >
operator+(Coord< Rank > const &b) constcutlass::TensorRef< T, 4 >inline
operator-(Coord< Rank > const &b) constcutlass::TensorRef< T, 4 >inline
operator=(TensorView const &_tensor)cutlass::TensorView< T >inline
operator[](Coord< Rank > const &coord) constcutlass::TensorView< T >inline
TensorRef< T, 4 >::operator[](int idx) constcutlass::TensorRef< T, 4 >inline
Rankcutlass::TensorView< T >static
ref()cutlass::TensorView< T >inline
ref() constcutlass::TensorView< T >inline
reset(TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())cutlass::TensorView< T >inline
TensorRef< T, 4 >::reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))cutlass::TensorRef< T, 4 >inline
size() constcutlass::TensorView< T >inline
size(int dim) constcutlass::TensorView< T >inline
Storage typedefcutlass::TensorRef< T, 4 >
stride() constcutlass::TensorView< T >inline
stride(int dim) constcutlass::TensorView< T >inline
subview(Coord_t const &location, Coord_t size) constcutlass::TensorView< T >inline
TensorRef()cutlass::TensorRef< T, 4 >inline
TensorRef(Storage *ptr, Coord< Rank > stride)cutlass::TensorRef< T, 4 >inline
TensorRef_t typedefcutlass::TensorView< T >
TensorView()cutlass::TensorView< T >inline
TensorView(TensorRef_t const &_ref, Coord_t const &_size)cutlass::TensorView< T >inline
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorView.html b/docs/generated-html/classcutlass_1_1TensorView.html new file mode 100644 index 0000000000..7dba23228c --- /dev/null +++ b/docs/generated-html/classcutlass_1_1TensorView.html @@ -0,0 +1,915 @@ + + + + + + + +Cutlass: cutlass::TensorView< T > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::TensorView< T > Class Template Reference
+
+
+ +

Host-side reference implementation of tensor operations. +

+ +

#include <tensor_view.h>

+
+Inheritance diagram for cutlass::TensorView< T >:
+
+
+ + +cutlass::TensorRef< T, 4 > + +
+ + + + + + + + + + + + + + + + + + + + + +

+Public Types

typedef TensorRef< T, 4 > Base
 Reference and stride. More...
 
typedef Base TensorRef_t
 Reference and stride. More...
 
typedef TensorRef< T const, 4 > ConstTensorRef_t
 Reference to constant type. More...
 
typedef int Offset_t
 Type used to compute the offset of an element to the base of a tensor. More...
 
typedef Coord< RankCoord_t
 Coordinate into tensor. More...
 
- Public Types inherited from cutlass::TensorRef< T, 4 >
typedef T Storage
 Data type of individual access. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

CUTLASS_HOST_DEVICE TensorView ()
 Default constructor. More...
 
CUTLASS_HOST_DEVICE TensorView (TensorRef_t const &_ref, Coord_t const &_size)
 Constructs a Tensor_view from a TensorRef and size. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the Tensor_view is bound to some memory. More...
 
CUTLASS_HOST_DEVICE T * data () const
 Returns a pointer to data. More...
 
CUTLASS_HOST_DEVICE void reset (TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())
 Updates the reference and size of a Tensor_view object. More...
 
CUTLASS_HOST_DEVICE TensorRef_tref ()
 Accesses the tensor reference pointing to data. More...
 
CUTLASS_HOST_DEVICE ConstTensorRef_t const_ref ()
 
CUTLASS_HOST_DEVICE TensorRef_t const & ref () const
 Accesses the tensor reference pointing to data. More...
 
CUTLASS_HOST_DEVICE Coord_t const & size () const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE int size (int dim) const
 Accesses the size. More...
 
CUTLASS_HOST_DEVICE Coord_t const & stride () const
 Accesses the stride. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Accesses the stride. More...
 
CUTLASS_HOST_DEVICE TensorViewoperator= (TensorView const &_tensor)
 Assigns the Tensor_view. More...
 
CUTLASS_HOST_DEVICE Offset_t offset (Coord_t const &coord) const
 Returns the index of an element. More...
 
CUTLASS_HOST_DEVICE bool contains (Coord_t const &coord) const
 Determines whether a location is within a tensor. More...
 
CUTLASS_HOST_DEVICE T & at (Coord_t const &coord) const
 Element-wise accessor. More...
 
T & operator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE T & at (Offset_t idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorView< T > subview (Coord_t const &location, Coord_t size) const
 Returns a Tensor_view given location and size quantities. More...
 
- Public Member Functions inherited from cutlass::TensorRef< T, 4 >
CUTLASS_HOST_DEVICE TensorRef ()
 Default ctor. More...
 
CUTLASS_HOST_DEVICE TensorRef (Storage *ptr, Coord< Rank > stride)
 Constructs from a pointer, size, and stride. More...
 
CUTLASS_HOST_DEVICE void reset (Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
 Updates the pointer, stride, and location within a TensorRef. More...
 
TensorRef< T, Rankconvert ()
 Conversion function. More...
 
CUTLASS_HOST_DEVICE bool good () const
 Returns true if the TensorRef may be safely accessed. More...
 
CUTLASS_HOST_DEVICE Storagedata () const
 Returns the pointer to referenced data. More...
 
CUTLASS_HOST_DEVICE Coord< Rank > const & stride () const
 Returns the stride of the tensor. More...
 
CUTLASS_HOST_DEVICE int const & stride (int dim) const
 Returns the stride of the tensor in the given dimension. More...
 
CUTLASS_HOST_DEVICE int leading_dim () const
 Returns the maximum stride element as the 'leading dimension'. More...
 
CUTLASS_HOST_DEVICE long long offset (Coord< Rank > const &coord) const
 Computes the offset of an index from the origin of the tensor. More...
 
CUTLASS_HOST_DEVICE Storageat (Coord< Rank > const &coord) const
 Returns a reference to the element at a given Coord. More...
 
CUTLASS_HOST_DEVICE Storageat (int idx) const
 Returns a reference to the element at a given Coord. More...
 
Storageoperator[] (Coord< Rank > const &coord) const
 Element-wise accessor. More...
 
Storageoperator[] (int idx) const
 Element-wise accessor. More...
 
CUTLASS_HOST_DEVICE TensorRefadvance (Coord< Rank > const &b)
 Adds an offset to the pointer. More...
 
CUTLASS_HOST_DEVICE TensorRef operator+ (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
CUTLASS_HOST_DEVICE TensorRef operator- (Coord< Rank > const &b) const
 Returns a TensorRef offset by a given amount. More...
 
+ + + + + + + + +

+Static Public Attributes

static int const Rank = TensorRef_t::Rank
 Rank of tensor. More...
 
- Static Public Attributes inherited from cutlass::TensorRef< T, 4 >
static int const Rank
 Rank of tensor. More...
 
+

Member Typedef Documentation

+ +

◆ Base

+ +
+
+
+template<typename T>
+ + + + +
typedef TensorRef<T, 4> cutlass::TensorView< T >::Base
+
+ +
+
+ +

◆ ConstTensorRef_t

+ +
+
+
+template<typename T>
+ + + + +
typedef TensorRef<T const, 4> cutlass::TensorView< T >::ConstTensorRef_t
+
+ +
+
+ +

◆ Coord_t

+ +
+
+
+template<typename T>
+ + + + +
typedef Coord<Rank> cutlass::TensorView< T >::Coord_t
+
+ +
+
+ +

◆ Offset_t

+ +
+
+
+template<typename T>
+ + + + +
typedef int cutlass::TensorView< T >::Offset_t
+
+ +
+
+ +

◆ TensorRef_t

+ +
+
+
+template<typename T>
+ + + + +
typedef Base cutlass::TensorView< T >::TensorRef_t
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ TensorView() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorView< T >::TensorView ()
+
+inline
+
+ +
+
+ +

◆ TensorView() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE cutlass::TensorView< T >::TensorView (TensorRef_t const & _ref,
Coord_t const & _size 
)
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ at() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE T& cutlass::TensorView< T >::at (Coord_t const & coord) const
+
+inline
+
+ +
+
+ +

◆ at() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE T& cutlass::TensorView< T >::at (Offset_t idx) const
+
+inline
+
+ +
+
+ +

◆ const_ref()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE ConstTensorRef_t cutlass::TensorView< T >::const_ref ()
+
+inline
+
+ +
+
+ +

◆ contains()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorView< T >::contains (Coord_t const & coord) const
+
+inline
+
+ +
+
+ +

◆ data()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE T* cutlass::TensorView< T >::data () const
+
+inline
+
+ +
+
+ +

◆ good()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE bool cutlass::TensorView< T >::good () const
+
+inline
+
+ +
+
+ +

◆ offset()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE Offset_t cutlass::TensorView< T >::offset (Coord_t const & coord) const
+
+inline
+
+ +
+
+ +

◆ operator=()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE TensorView& cutlass::TensorView< T >::operator= (TensorView< T > const & _tensor)
+
+inline
+
+ +
+
+ +

◆ operator[]()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
T& cutlass::TensorView< T >::operator[] (Coord< Rank > const & coord) const
+
+inline
+
+ +
+
+ +

◆ ref() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef_t& cutlass::TensorView< T >::ref ()
+
+inline
+
+ +
+
+ +

◆ ref() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE TensorRef_t const& cutlass::TensorView< T >::ref () const
+
+inline
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE void cutlass::TensorView< T >::reset (TensorRef_t const & _ref = TensorRef_t(0),
Coord_t const & _size = Coord_t() 
)
+
+inline
+
+ +
+
+ +

◆ size() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Coord_t const& cutlass::TensorView< T >::size () const
+
+inline
+
+ +
+
+ +

◆ size() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int cutlass::TensorView< T >::size (int dim) const
+
+inline
+
+ +
+
+ +

◆ stride() [1/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + +
CUTLASS_HOST_DEVICE Coord_t const& cutlass::TensorView< T >::stride () const
+
+inline
+
+ +
+
+ +

◆ stride() [2/2]

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + +
CUTLASS_HOST_DEVICE int const& cutlass::TensorView< T >::stride (int dim) const
+
+inline
+
+ +
+
+ +

◆ subview()

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
CUTLASS_HOST_DEVICE TensorView<T> cutlass::TensorView< T >::subview (Coord_t const & location,
Coord_t size 
) const
+
+inline
+
+ +
+
+

Member Data Documentation

+ +

◆ Rank

+ +
+
+
+template<typename T>
+ + + + + +
+ + + + +
int const cutlass::TensorView< T >::Rank = TensorRef_t::Rank
+
+static
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classcutlass_1_1TensorView.png b/docs/generated-html/classcutlass_1_1TensorView.png new file mode 100644 index 0000000000000000000000000000000000000000..40500e8a3a854639f0a6a25982cb66d99a343817 GIT binary patch literal 690 zcmeAS@N?(olHy`uVBq!ia0vp^i-0(QgBeJ!EsL`TQW60^A+G=b{|7QZe0{O(Q0W99 z3ycpOI1sV1A`HmoC<*clW&kPzfvcxNj2IZ096eneLn;{G&b`?8Sb@it-#qC5|H<2f z6(pQ8w%=oSdo5`0X?sj2XU?7?nd2L`Tc~8Fvo2Dp6z!PwZ?xiR#i%)+_t>3h zDxT~U-uPkb?I-S=XI(YEWwBhU-QsZAd%@n|_8Sw;oKMbJVrF&Z;@ZSpm!?==k2>Rj z{pAj~HQskv^f}B5y)K0=zrV_Q)pw4aE;*C+I=9Tdu(P+K)-fznMyh7s(QTJ+Z{DHz zh3Vtu%^5tKH)rfJPrl}7b2|8ShR_Y!ts57=wez$0Ogq!9e6mk=pI-jGjnWGHesle~ zm%l~j?IMRMD(jg7C+(?Hk~?0QquWrwn*G4NTk;=2-k4Wde?c=|}$>^~XT%6=NE zY*;K1xu^KO*u^(DZN6?%P7>cBnfCRSfcti~jkkVp)et_hz2m9KebHTWKJ7lTRWxqp zx_#>lJ;UzX%PZen+UBtKR`YG$r07laKId + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass::platform::unique_ptr< T, Deleter > Member List
+
+ + + + + diff --git a/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html b/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html new file mode 100644 index 0000000000..cf455f2e5f --- /dev/null +++ b/docs/generated-html/classcutlass_1_1platform_1_1unique__ptr.html @@ -0,0 +1,554 @@ + + + + + + + +Cutlass: cutlass::platform::unique_ptr< T, Deleter > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass::platform::unique_ptr< T, Deleter > Class Template Reference
+
+
+ +

std::unique_ptr +

+ +

#include <platform.h>

+ + + + + + + + +

+Public Types

typedef T * pointer
 
typedef T element_type
 
typedef Deleter deleter_type
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

 unique_ptr ()
 
 unique_ptr (pointer p)
 
 ~unique_ptr ()
 
pointer get () const noexcept
 Returns a pointer to the managed object or nullptr if no object is owned. More...
 
pointer release () noexcept
 Releases ownership of the managed object, if any. More...
 
void reset (pointer p=pointer()) noexcept
 Replaces the managed object, deleting the old object. More...
 
void swap (unique_ptr &other) noexcept
 Swaps the managed objects with *this and another unique_ptr. More...
 
Deleter & get_deleter () noexcept
 Returns the deleter object. More...
 
Deleter const & get_deleter () const noexcept
 Returns the deleter object. More...
 
 operator bool () const noexcept
 Checks whether an object is owned. More...
 
T & operator* () const
 Dereferences the unique_ptr. More...
 
pointer operator-> () const noexcept
 Returns a pointer to the managed object. More...
 
T & operator[] (size_t i) const
 Array access to managed object. More...
 
+

Member Typedef Documentation

+ +

◆ deleter_type

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + +
typedef Deleter cutlass::platform::unique_ptr< T, Deleter >::deleter_type
+
+ +
+
+ +

◆ element_type

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + +
typedef T cutlass::platform::unique_ptr< T, Deleter >::element_type
+
+ +
+
+ +

◆ pointer

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + +
typedef T* cutlass::platform::unique_ptr< T, Deleter >::pointer
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ unique_ptr() [1/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::unique_ptr ()
+
+inline
+
+ +
+
+ +

◆ unique_ptr() [2/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::unique_ptr (pointer p)
+
+inline
+
+ +
+
+ +

◆ ~unique_ptr()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::~unique_ptr ()
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer cutlass::platform::unique_ptr< T, Deleter >::get () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [1/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter& cutlass::platform::unique_ptr< T, Deleter >::get_deleter ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [2/2]

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter const& cutlass::platform::unique_ptr< T, Deleter >::get_deleter () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator bool()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
cutlass::platform::unique_ptr< T, Deleter >::operator bool () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
T& cutlass::platform::unique_ptr< T, Deleter >::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator->()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer cutlass::platform::unique_ptr< T, Deleter >::operator-> () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator[]()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
T& cutlass::platform::unique_ptr< T, Deleter >::operator[] (size_t i) const
+
+inline
+
+ +
+
+ +

◆ release()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer cutlass::platform::unique_ptr< T, Deleter >::release ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
void cutlass::platform::unique_ptr< T, Deleter >::reset (pointer p = pointer())
+
+inlinenoexcept
+
+ +
+
+ +

◆ swap()

+ +
+
+
+template<class T, class Deleter = default_delete<T>>
+ + + + + +
+ + + + + + + + +
void cutlass::platform::unique_ptr< T, Deleter >::swap (unique_ptr< T, Deleter > & other)
+
+inlinenoexcept
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/classes.html b/docs/generated-html/classes.html new file mode 100644 index 0000000000..9896653f61 --- /dev/null +++ b/docs/generated-html/classes.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: Class Index + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class Index
+
+
+
a | b | c | d | e | f | g | h | i | l | m | n | p | r | s | t | u | v | w
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  a  
+
FragmentMultiplyAdd (cutlass::gemm)   IgemmEpilogueScalar (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 8 > (cutlass)   GlobalLoadStreamBase::SharedStorage (cutlass::gemm)   
FragmentMultiplyAdd< half > (cutlass::gemm)   IgemmEpilogueScalar< int > (cutlass::gemm)   log2_down (cutlass)   SimplifiedGemmEpilogueTraits (cutlass::gemm)   
aligned_chunk (cutlass::platform)   FragmentStore (cutlass)   IgemmEpilogueTraits (cutlass::gemm)   log2_down< N, 1, Count > (cutlass)   SimplifiedGemmTraits (cutlass::gemm)   
aligned_storage (cutlass::platform)   FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogueTraitsHelper (cutlass::gemm)   log2_up (cutlass)   SimplifiedGemmTraitsHelper (cutlass::gemm)   
AlignedStruct (cutlass)   FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmFloatToInt8Converter (cutlass::gemm)   log2_up< N, 1, Count > (cutlass)   sqrt_est (cutlass)   
alignment_of (cutlass::platform)   
  g  
+
IgemmGlobalLoadTransformer (cutlass::gemm)   
  m  
+
StorageType (cutlass)   
alignment_of< const value_t > (cutlass::platform)   IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > (cutlass::gemm)   StorageType< 1 > (cutlass)   
alignment_of< const volatile value_t > (cutlass::platform)   Gemm (cutlass::gemm)   IgemmGlobalStoreTransformer (cutlass::gemm)   GemmTraits::MainLoopSharedStorage (cutlass::gemm)   StorageType< 2 > (cutlass)   
alignment_of< double2 > (cutlass::platform)   GemmConfig (cutlass::gemm)   IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > (cutlass::gemm)   MatrixLayout (cutlass)   StorageType< 4 > (cutlass)   
alignment_of< double4 > (cutlass::platform)   GemmDesc (cutlass::gemm)   IgemmInt8ToFloatConverter (cutlass::gemm)   MemorySpace (cutlass)   Store (cutlass)   
alignment_of< float4 > (cutlass::platform)   GemmEpilogue (cutlass::gemm)   IgemmSharedStoreTransformer (cutlass::gemm)   
  n  
+
Store< double, 2, Memory_, true, 16 > (cutlass)   
alignment_of< int4 > (cutlass::platform)   GemmEpilogueTraits (cutlass::gemm)   IgemmSwizzle (cutlass::gemm)   Store< Scalar_, Lanes_, Memory_, true, 16 > (cutlass)   
alignment_of< long4 > (cutlass::platform)   GemmEpilogueTraitsHelper (cutlass::gemm)   IgemmTileTraitsHelperA (cutlass::gemm)   nullptr_t (cutlass::platform)   Store< Scalar_, Lanes_, Memory_, true, 4 > (cutlass)   
alignment_of< longlong2 > (cutlass::platform)   GemmGlobalIteratorAb (cutlass::gemm)   IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   
  p  
+
Store< Scalar_, Lanes_, Memory_, true, 8 > (cutlass)   
alignment_of< longlong4 > (cutlass::platform)   GemmGlobalIteratorCd (cutlass::gemm)   IgemmTileTraitsHelperB (cutlass::gemm)   GemmTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< uint4 > (cutlass::platform)   GemmGlobalTileCdTraits (cutlass::gemm)   IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   alignment_of::pad (cutlass::platform)   GemmEpilogueTraits::StreamSharedStorage (cutlass::gemm)   
alignment_of< ulong4 > (cutlass::platform)   GemmGlobalTileTraits (cutlass::gemm)   IgemmTraits (cutlass::gemm)   WmmaGemmGlobalIteratorCd::Params (cutlass::gemm)   
  t  
+
alignment_of< ulonglong2 > (cutlass::platform)   GemmMultiplicandTraits (cutlass::gemm)   IgemmTraitsHelper (cutlass::gemm)   GemmTraits::Params (cutlass::gemm)   
alignment_of< ulonglong4 > (cutlass::platform)   GemmOperand (cutlass)   IgemmTransformerA (cutlass::gemm)   GlobalLoadStreamBase::Params (cutlass::gemm)   TensorRef (cutlass)   
alignment_of< volatile value_t > (cutlass::platform)   GemmOperandTraitsAb (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   TileIteratorBase::Params (cutlass)   TensorView (cutlass)   
  b  
+
GemmSharedLoadTileATraits (cutlass::gemm)   IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   GemmGlobalIteratorCd::Params (cutlass::gemm)   ThreadMultiplyAdd (cutlass::gemm)   
GemmSharedLoadTileBTraits (cutlass::gemm)   IgemmTransformerB (cutlass::gemm)   TileLoadIterator::Params (cutlass)   ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > (cutlass::gemm)   
bool_constant (cutlass::platform)   GemmSharedLoadTileDTraits (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   TileStoreIterator::Params (cutlass)   ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > (cutlass::gemm)   
  c  
+
GemmSharedStoreTileAbTraits (cutlass::gemm)   IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   GemmEpilogueTraits::Params (cutlass::gemm)   GemmSharedLoadTileBTraits::ThreadOffset (cutlass::gemm)   
GemmSharedStoreTileDTraits (cutlass::gemm)   integral_constant (cutlass::platform)   Gemm::Params (cutlass::gemm)   GemmGlobalTileCdTraits::ThreadOffset (cutlass::gemm)   
ClearAccumulators (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits (cutlass::gemm)   is_arithmetic (cutlass::platform)   SharedLoadStream::Params (cutlass::gemm)   IgemmContiguousGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape (cutlass)   GemmTileTraitsHelperA (cutlass::gemm)   is_base_of (cutlass::platform)   LinearScaling::Params (cutlass::gemm)   GemmGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > (cutlass)   GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_base_of_helper (cutlass::platform)   GemmGlobalIteratorAb::Params (cutlass::gemm)   GemmSharedLoadTileDTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > (cutlass)   GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_floating_point (cutlass::platform)   plus (cutlass::platform)   GemmSharedLoadTileATraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides (cutlass)   GemmTileTraitsHelperB (cutlass::gemm)   is_fundamental (cutlass::platform)   PredicateTileAdapter (cutlass)   GemmSharedStoreTileDTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > (cutlass)   GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_integral (cutlass::platform)   PredicateVector (cutlass)   HgemmCrosswiseGlobalTileTraits::ThreadOffset (cutlass::gemm)   
ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_integral< char > (cutlass::platform)   ProjectOperand (cutlass::gemm)   GemmSharedStoreTileAbTraits::ThreadOffset (cutlass::gemm)   
ComputeThreadOffsetFromStrides (cutlass)   GemmTraits (cutlass::gemm)   is_integral< const T > (cutlass::platform)   ProjectOperand< GemmOperand::kA, Kstrided > (cutlass::gemm)   TileTraitsWarpRake::ThreadOffset (cutlass)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > (cutlass)   GetExtent (cutlass::gemm)   is_integral< const volatile T > (cutlass::platform)   ProjectOperand< GemmOperand::kB, Kstrided > (cutlass::gemm)   GemmSharedStoreWithSkewTileAbTraits::ThreadOffset (cutlass::gemm)   
ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > (cutlass)   GetExtent< GemmOperand::kA, Tile_ > (cutlass::gemm)   is_integral< int > (cutlass::platform)   ProjectOperand< GemmOperand::kC, true > (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits::ThreadOffset (cutlass::gemm)   
conditional (cutlass::platform)   GetExtent< GemmOperand::kB, Tile_ > (cutlass::gemm)   is_integral< long > (cutlass::platform)   ProjectOperand< GemmOperand::kD, true > (cutlass::gemm)   TiledThreadOffset (cutlass)   
conditional< false, T, F > (cutlass::platform)   GemmTraits::GlobalLoadStream (cutlass::gemm)   is_integral< long long > (cutlass::platform)   
  r  
+
TileIteratorBase (cutlass)   
PredicateVector::ConstIterator (cutlass)   GlobalLoadStream (cutlass::gemm)   is_integral< short > (cutlass::platform)   TileLoadIterator (cutlass)   
ConstPredicateTileAdapter (cutlass)   GlobalLoadStreamBase (cutlass::gemm)   is_integral< signed char > (cutlass::platform)   remove_const (cutlass::platform)   TileStoreIterator (cutlass)   
Convert (cutlass)   greater (cutlass::platform)   is_integral< unsigned char > (cutlass::platform)   remove_const< const T > (cutlass::platform)   TileTraits (cutlass)   
Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > (cutlass)   
  h  
+
is_integral< unsigned int > (cutlass::platform)   remove_cv (cutlass::platform)   TileTraitsContiguousMajor (cutlass)   
Coord (cutlass)   is_integral< unsigned long > (cutlass::platform)   remove_volatile (cutlass::platform)   TileTraitsStandard (cutlass)   
Copy (cutlass)   HgemmConfig (cutlass::gemm)   is_integral< unsigned long long > (cutlass::platform)   remove_volatile< volatile T > (cutlass::platform)   TileTraitsStrideMajor (cutlass)   
  d  
+
HgemmCrosswiseGlobalTileTraits (cutlass::gemm)   is_integral< unsigned short > (cutlass::platform)   ReshapeThreads (cutlass::gemm)   TileTraitsWarpRake (cutlass)   
HgemmSwizzle (cutlass::gemm)   is_integral< volatile T > (cutlass::platform)   ReshapeThreads< Tile_, Threads_, true > (cutlass::gemm)   PredicateVector::TrivialIterator (cutlass)   
default_delete (cutlass::platform)   HgemmTileTraitsHelperA (cutlass::gemm)   is_pointer (cutlass::platform)   ReshapeTile (cutlass)   TrivialPredicateTileAdapter (cutlass)   
default_delete< T[]> (cutlass::platform)   HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > (cutlass::gemm)   is_pointer_helper (cutlass::platform)   ReshapeTile< Tile_, kAccessSize_, true > (cutlass)   
  u  
+
DgemmConfig (cutlass::gemm)   HgemmTileTraitsHelperB (cutlass::gemm)   is_pointer_helper< T * > (cutlass::platform)   
  s  
+
DgemmTraits (cutlass::gemm)   HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > (cutlass::gemm)   is_pow2 (cutlass)   unique_ptr (cutlass::platform)   
divide_assert (cutlass)   HgemmTraits (cutlass::gemm)   is_same (cutlass::platform)   SgemmConfig (cutlass::gemm)   
  v  
+
is_base_of_helper::dummy (cutlass::platform)   HgemmTraitsHelper (cutlass::gemm)   is_same< A, A > (cutlass::platform)   SgemmTraits (cutlass::gemm)   
  e  
+
HgemmTransformerA (cutlass::gemm)   is_trivially_copyable (cutlass::platform)   Shape (cutlass)   Vector (cutlass)   
HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   is_void (cutlass::platform)   ShapeAdd (cutlass)   Vector< half, kLanes_ > (cutlass)   
enable_if (cutlass::platform)   HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   is_volatile (cutlass::platform)   ShapeCount (cutlass)   Vectorize (cutlass)   
enable_if< false, T > (cutlass::platform)   HgemmTransformerB (cutlass::gemm)   is_volatile< volatile T > (cutlass::platform)   ShapeDiv (cutlass)   Vectorize< Element_, 1 > (cutlass)   
Extent (cutlass)   HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > (cutlass::gemm)   PredicateVector::Iterator (cutlass)   ShapeMax (cutlass)   VectorTraits (cutlass)   
Extent< Vector< T, Lanes > > (cutlass)   HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > (cutlass::gemm)   IteratorAdvance (cutlass)   ShapeMin (cutlass)   VectorTraits< Vector< T, Lanes > > (cutlass)   
Extent< Vector< T, Lanes > const > (cutlass)   
  i  
+
IteratorFragment (cutlass)   ShapeMul (cutlass)   VectorTraits< Vector< T, Lanes > const > (cutlass)   
  f  
+
  l  
+
ShapeScale (cutlass)   
  w  
+
Identity (cutlass)   ShapeStrides (cutlass)   
Fragment (cutlass)   IdentityBlockSwizzle (cutlass::gemm)   less (cutlass::platform)   ShapeSub (cutlass)   WmmaGemmGlobalIteratorCd (cutlass::gemm)   
FragmentConstIterator (cutlass)   IgemmConfig (cutlass::gemm)   LinearScaling (cutlass::gemm)   GemmTraits::SharedLoadStream (cutlass::gemm)   WmmaGemmGlobalIteratorCdTraits (cutlass::gemm)   
FragmentIterator (cutlass)   IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > (cutlass::gemm)   Load (cutlass)   SharedLoadStream (cutlass::gemm)   
FragmentLoad (cutlass)   IgemmContiguousGlobalTileTraits (cutlass::gemm)   Load< double, 2, Memory_, true, 16 > (cutlass)   ClearAccumulators::SharedStorage (cutlass::gemm)   
FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogue (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 16 > (cutlass)   GemmEpilogueTraits::SharedStorage (cutlass::gemm)   
FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > (cutlass)   IgemmEpilogue< GemmEpilogueTraits_, true > (cutlass::gemm)   Load< Scalar_, Lanes_, Memory_, true, 4 > (cutlass)   GemmTraits::SharedStorage (cutlass::gemm)   
+
a | b | c | d | e | f | g | h | i | l | m | n | p | r | s | t | u | v | w
+
+ + + + diff --git a/docs/generated-html/classnv__std_1_1unique__ptr-members.html b/docs/generated-html/classnv__std_1_1unique__ptr-members.html new file mode 100644 index 0000000000..5c9df4c5a4 --- /dev/null +++ b/docs/generated-html/classnv__std_1_1unique__ptr-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
nv_std::unique_ptr< T, Deleter > Member List
+
+ + + + + diff --git a/docs/generated-html/classnv__std_1_1unique__ptr.html b/docs/generated-html/classnv__std_1_1unique__ptr.html new file mode 100644 index 0000000000..9abd8886bc --- /dev/null +++ b/docs/generated-html/classnv__std_1_1unique__ptr.html @@ -0,0 +1,554 @@ + + + + + + + +Cutlass: nv_std::unique_ptr< T, Deleter > Class Template Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
nv_std::unique_ptr< T, Deleter > Class Template Reference
+
+
+ +

std::unique_ptr +

+ +

#include <nv_std.h>

+ + + + + + + + +

+Public Types

typedef T * pointer
 
typedef T element_type
 
typedef Deleter deleter_type
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Public Member Functions

 unique_ptr ()
 
 unique_ptr (pointer p)
 
 ~unique_ptr ()
 
pointer get () const noexcept
 Returns a pointer to the managed object or nullptr if no object is owned. More...
 
pointer release () noexcept
 Releases ownership of the managed object, if any. More...
 
void reset (pointer p=pointer()) noexcept
 Replaces the managed object, deleting the old object. More...
 
void swap (unique_ptr &other) noexcept
 Swaps the managed objects with *this and another unique_ptr. More...
 
Deleter & get_deleter () noexcept
 Returns the deleter object. More...
 
Deleter const & get_deleter () const noexcept
 Returns the deleter object. More...
 
 operator bool () const noexcept
 Checks whether an object is owned. More...
 
T & operator* () const
 Dereferences the unique_ptr. More...
 
pointer operator-> () const noexcept
 Returns a pointer to the managed object. More...
 
T & operator[] (size_t i) const
 Array access to managed object. More...
 
+

Member Typedef Documentation

+ +

◆ deleter_type

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + +
typedef Deleter nv_std::unique_ptr< T, Deleter >::deleter_type
+
+ +
+
+ +

◆ element_type

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + +
typedef T nv_std::unique_ptr< T, Deleter >::element_type
+
+ +
+
+ +

◆ pointer

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + +
typedef T* nv_std::unique_ptr< T, Deleter >::pointer
+
+ +
+
+

Constructor & Destructor Documentation

+ +

◆ unique_ptr() [1/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
nv_std::unique_ptr< T, Deleter >::unique_ptr ()
+
+inline
+
+ +
+
+ +

◆ unique_ptr() [2/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
nv_std::unique_ptr< T, Deleter >::unique_ptr (pointer p)
+
+inline
+
+ +
+
+ +

◆ ~unique_ptr()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
nv_std::unique_ptr< T, Deleter >::~unique_ptr ()
+
+inline
+
+ +
+
+

Member Function Documentation

+ +

◆ get()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer nv_std::unique_ptr< T, Deleter >::get () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [1/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter& nv_std::unique_ptr< T, Deleter >::get_deleter ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ get_deleter() [2/2]

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
Deleter const& nv_std::unique_ptr< T, Deleter >::get_deleter () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator bool()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
nv_std::unique_ptr< T, Deleter >::operator bool () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator*()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
T& nv_std::unique_ptr< T, Deleter >::operator* () const
+
+inline
+
+ +
+
+ +

◆ operator->()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer nv_std::unique_ptr< T, Deleter >::operator-> () const
+
+inlinenoexcept
+
+ +
+
+ +

◆ operator[]()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
T& nv_std::unique_ptr< T, Deleter >::operator[] (size_t i) const
+
+inline
+
+ +
+
+ +

◆ release()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + +
pointer nv_std::unique_ptr< T, Deleter >::release ()
+
+inlinenoexcept
+
+ +
+
+ +

◆ reset()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
void nv_std::unique_ptr< T, Deleter >::reset (pointer p = pointer())
+
+inlinenoexcept
+
+ +
+
+ +

◆ swap()

+ +
+
+
+template<class T, class Deleter = nv_std::default_delete<T>>
+ + + + + +
+ + + + + + + + +
void nv_std::unique_ptr< T, Deleter >::swap (unique_ptr< T, Deleter > & other)
+
+inlinenoexcept
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + diff --git a/docs/generated-html/clear__accumulators_8h.html b/docs/generated-html/clear__accumulators_8h.html new file mode 100644 index 0000000000..b4bd3b39c2 --- /dev/null +++ b/docs/generated-html/clear__accumulators_8h.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: clear_accumulators.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
clear_accumulators.h File Reference
+
+
+ +

Defines abstractions for efficiently clearing accumulator tiles. +More...

+
#include <cutlass/vector.h>
+
+

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >
 
struct  cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage
 The shared storage. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/clear__accumulators_8h_source.html b/docs/generated-html/clear__accumulators_8h_source.html new file mode 100644 index 0000000000..1a6f517fb5 --- /dev/null +++ b/docs/generated-html/clear__accumulators_8h_source.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: clear_accumulators.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
clear_accumulators.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/vector.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_, int kLanes_ = 1>
40  struct SharedStorage {};
41 
43  CUTLASS_DEVICE ClearAccumulators(SharedStorage& shared_storage) {}
44 
46  template <typename Fragment_>
47  CUTLASS_DEVICE void clear(Fragment_& fragment) {
48  fragment.clear();
49  }
50 };
51 
53 
54 } // namespace gemm
55 } // namespace cutlass
Definition: convert.h:33
+
Definition: clear_accumulators.h:38
+
CUTLASS_DEVICE ClearAccumulators(SharedStorage &shared_storage)
Ctor.
Definition: clear_accumulators.h:43
+
Defines a 1D vector of elements held in the registers of each thread.
+
CUTLASS_DEVICE void clear(Fragment_ &fragment)
Clear the fragment.
Definition: clear_accumulators.h:47
+
The shared storage.
Definition: clear_accumulators.h:40
+
+ + + + diff --git a/docs/generated-html/closed.png b/docs/generated-html/closed.png new file mode 100644 index 0000000000000000000000000000000000000000..f820ec9ca6a972fb78ff2d2951418304cdcae4a2 GIT binary patch literal 133 zcmeAS@N?(olHy`uVBq!ia0vp^oFL4>1|%O$WD@{VKAtX)Ar*{o=LYgRDDp7#=P0u1 zb5z6$H0ho2h})!VAY$_NdS0|D!;CK%)e`0~*4WHoNanrr>Fv*tlj1q_Tjy?hlFDIg g{r71`Wc1I=adboFyt=akR{0Li8)Q~&?~ literal 0 HcmV?d00001 diff --git a/docs/generated-html/convert_8h.html b/docs/generated-html/convert_8h.html new file mode 100644 index 0000000000..422c520173 --- /dev/null +++ b/docs/generated-html/convert_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: convert.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
convert.h File Reference
+
+
+ +

Defines conversion operations among Fragments of different base type. +More...

+
#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::Convert< InputFragment_, OutputFragment_ >
 
struct  cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 
struct  cutlass::Copy< Fragment_ >
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/convert_8h_source.html b/docs/generated-html/convert_8h_source.html new file mode 100644 index 0000000000..6e877d293e --- /dev/null +++ b/docs/generated-html/convert_8h_source.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: convert.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
convert.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
33 namespace cutlass {
34 
36 
37 template <typename InputFragment_, typename OutputFragment_>
38 struct Convert {};
39 
41 
42 template <typename InputScalar_, typename OutputScalar_, int kScalars_>
43 struct Convert<Fragment<InputScalar_, kScalars_>, Fragment<OutputScalar_, kScalars_> > {
48 
50  CUTLASS_DEVICE Convert() {}
51 
53  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
54  transform(src, 0, dst);
55  }
56 
58  template <typename Fragment_>
59  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
60  for (int i = 0; i < kScalars_; ++i) {
61  dst[i] = static_cast<OutputScalar_>(src[i + offset]);
62  }
63  }
64 };
65 
67 
68 template <typename Fragment_>
69 struct Copy {
71  typedef Fragment_ InputFragment;
73  typedef Fragment_ OutputFragment;
74 
76  CUTLASS_DEVICE Copy() {}
77 
79  CUTLASS_DEVICE void transform(Fragment_ const& src, Fragment_& dst) { transform(src, 0, dst); }
80 
82  template <typename InputFragment_>
83  CUTLASS_DEVICE void transform(InputFragment_ const& src, int offset, Fragment_& dst) {
84  if (sizeof(typename Fragment_::Element) == 8) {
85  uint64_t const* src_ptr = reinterpret_cast<uint64_t const*>(&src[offset]);
86  uint64_t* dst_ptr = reinterpret_cast<uint64_t*>(&dst[0]);
87  for (int i = 0; i < sizeof(Fragment_) / 8; ++i) {
88  dst_ptr[i] = src_ptr[i];
89  }
90  } else {
91  uint32_t const* src_ptr = reinterpret_cast<uint32_t const*>(&src[offset]);
92  uint32_t* dst_ptr = reinterpret_cast<uint32_t*>(&dst[0]);
93  for (int i = 0; i < sizeof(Fragment_) / 4; ++i) {
94  dst_ptr[i] = src_ptr[i];
95  }
96  }
97  }
98 };
99 
101 
102 } // namespace cutlass
Definition: convert.h:33
+
Fragment< OutputScalar_, kScalars_ > OutputFragment
The output fragment.
Definition: convert.h:47
+
Definition: convert.h:69
+
CUTLASS_DEVICE void transform(Fragment_ const &src, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:79
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
CUTLASS_DEVICE void transform(InputFragment_ const &src, int offset, Fragment_ &dst)
Transform a fragment.
Definition: convert.h:83
+ +
Fragment_ InputFragment
The input fragment.
Definition: convert.h:71
+
CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
Transform a fragment.
Definition: convert.h:53
+
CUTLASS_DEVICE Copy()
Ctor.
Definition: convert.h:76
+
Fragment_ OutputFragment
The output fragment.
Definition: convert.h:73
+
Fragment< InputScalar_, kScalars_ > InputFragment
The input fragment.
Definition: convert.h:45
+
CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
Transform a fragment.
Definition: convert.h:59
+
Definition: convert.h:38
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
+ + + + diff --git a/docs/generated-html/coord_8h.html b/docs/generated-html/coord_8h.html new file mode 100644 index 0000000000..5165038675 --- /dev/null +++ b/docs/generated-html/coord_8h.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: coord.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
coord.h File Reference
+
+
+ +

A Coord is a coordinate of arbitrary rank into a tensor or matrix. +More...

+
#include <cutlass/cutlass.h>
+
+

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::Identity
 Describes identity elements. More...
 
struct  cutlass::Coord< N_ >
 Statically-sized array specifying Coords within a tensor. More...
 
+ + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

CUTLASS_HOST_DEVICE Coord< 1 > cutlass::make_Coord (int _0)
 Helper to make a 2-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::make_Coord (int _0, int _1)
 Helper to make a 2-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::make_Coord (int _0, int _1, int _2)
 Helper to make a 3-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 4 > cutlass::make_Coord (int _0, int _1, int _2, int _3)
 Helper to make a 4-element coordinate. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::get_Coord_hw (Coord< 3 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 2 > cutlass::get_Coord_hw (Coord< 4 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::get_Coord_hwc (Coord< 4 > const &coord)
 Getter. More...
 
CUTLASS_HOST_DEVICE Coord< 3 > cutlass::get_Coord_dhw (Coord< 4 > const &coord)
 Getter. More...
 
+
+ + + + diff --git a/docs/generated-html/coord_8h_source.html b/docs/generated-html/coord_8h_source.html new file mode 100644 index 0000000000..71ec92e1af --- /dev/null +++ b/docs/generated-html/coord_8h_source.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: coord.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
coord.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/cutlass.h>
32 
33 namespace cutlass {
34 
36 
38 struct Identity {
41  enum Kind { Additive = 0, Multiplicative = 1 };
42 };
43 
45 
47 template <int N_>
48 struct Coord {
49  //
50  // Type and constant definitions
51  //
52 
53  static int const N = N_;
54 
55  //
56  // Data members
57  //
58 
60  int idx[N];
61 
62  //
63  // Methods
64  //
65 
68  Coord(int value = 0) {
69  for (int i = 0; i < N; ++i) {
70  idx[i] = value;
71  }
72  }
73 
76  Coord(int _idx[]) {
77  for (int i = 0; i < N; ++i) {
78  idx[i] = _idx[i];
79  }
80  }
81 
84  Coord operator+(Coord const& b) const {
85  Coord c;
86  for (int i = 0; i < N; ++i) {
87  c.idx[i] = idx[i] + b.idx[i];
88  }
89  return c;
90  }
91 
94  Coord operator-(Coord const& b) const {
95  Coord c;
96  for (int i = 0; i < N; ++i) {
97  c.idx[i] = idx[i] - b.idx[i];
98  }
99  return c;
100  }
101 
104  Coord operator*(Coord const& b) const {
105  Coord c;
106  for (int i = 0; i < N; ++i) {
107  c.idx[i] = idx[i] * b.idx[i];
108  }
109  return c;
110  }
111 
114  Coord operator/(Coord const& b) const {
115  Coord c;
116  for (int i = 0; i < N; ++i) {
117  c.idx[i] = idx[i] / b.idx[i];
118  }
119  return c;
120  }
121 
124  Coord& operator+=(Coord const& b) {
125  for (int i = 0; i < N; ++i) {
126  idx[i] += b.idx[i];
127  }
128  return *this;
129  }
130 
133  Coord& operator-=(Coord const& b) {
134  for (int i = 0; i < N; ++i) {
135  idx[i] -= b.idx[i];
136  }
137  return *this;
138  }
139 
142  Coord& operator*=(Coord const& b) {
143  for (int i = 0; i < N; ++i) {
144  idx[i] *= b.idx[i];
145  }
146  return *this;
147  }
148 
151  Coord& operator/=(Coord const& b) {
152  for (int i = 0; i < N; ++i) {
153  idx[i] /= b.idx[i];
154  }
155  return *this;
156  }
157 
159  CUTLASS_HOST_DEVICE int& operator[](int dim) { return idx[dim]; }
160 
162  CUTLASS_HOST_DEVICE int const& operator[](int dim) const { return idx[dim]; }
163 
165  template <typename T>
166  CUTLASS_HOST_DEVICE T dot(Coord const& b, T sum) const {
167  for (int i = 0; i < N; ++i) {
168  sum += idx[i] * b.idx[i];
169  }
170  return sum;
171  }
172 
174  template <typename T>
175  CUTLASS_HOST_DEVICE T dot(Coord const& b) const {
176  T sum = T(0);
177  for (int i = 0; i < N; ++i) {
178  sum += idx[i] * b.idx[i];
179  }
180  return sum;
181  }
182 
184  template <int Dim>
186  return idx[Dim];
187  }
188 
191  int& at(int dim) { return idx[dim]; }
192 
194  template <int Dim>
195  CUTLASS_HOST_DEVICE int const& at() const {
196  return idx[Dim];
197  }
198 
201  int const& at(int dim) const { return idx[dim]; }
202 
205  bool operator==(Coord<N> const& b) const {
206  bool equal = true;
207  for (int i = 0; equal && i < N; ++i) {
208  equal = (idx[i] == b.idx[i]);
209  }
210  return equal;
211  }
212 
215  bool operator!=(Coord<N> const& b) const { return !(*this == b); }
216 
219  Coord& clamp(Coord<N> const& max, Coord<N> const& min = Coord<N>()) {
220  for (int i = 0; i < N; ++i) {
221  idx[i] = __NV_STD_MAX(__NV_STD_MIN(idx[i], max.idx[i]), min.idx[i]);
222  }
223  return *this;
224  }
225 
228  int count() const {
229  int product = idx[0];
230  for (int i = 1; i < N; ++i) {
231  product *= idx[i];
232  }
233  return product;
234  }
235 };
236 
238 
242  int values[1] = {_0};
243  return Coord<1>(values);
244 }
245 
248 Coord<2> make_Coord(int _0, int _1) {
249  int values[2] = {_0, _1};
250  return Coord<2>(values);
251 }
252 
255 Coord<3> make_Coord(int _0, int _1, int _2) {
256  int values[3] = {_0, _1, _2};
257  return Coord<3>(values);
258 }
259 
262 Coord<4> make_Coord(int _0, int _1, int _2, int _3) {
263  int values[4] = {_0, _1, _2, _3};
264  return Coord<4>(values);
265 }
266 
268 
271 Coord<2> get_Coord_hw(Coord<3> const& coord) { return make_Coord(coord[1], coord[2]); }
272 
275 Coord<2> get_Coord_hw(Coord<4> const& coord) { return make_Coord(coord[1], coord[2]); }
276 
279 Coord<3> get_Coord_hwc(Coord<4> const& coord) { return make_Coord(coord[1], coord[2], coord[3]); }
280 
283 Coord<3> get_Coord_dhw(Coord<4> const& coord) { return make_Coord(coord[0], coord[1], coord[2]); }
284 
286 
287 } // namespace cutlass
CUTLASS_HOST_DEVICE int const & operator[](int dim) const
Member access operator.
Definition: coord.h:162
+
CUTLASS_HOST_DEVICE int count() const
Returns the product of all elements.
Definition: coord.h:228
+
Describes identity elements.
Definition: coord.h:38
+
CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
std::max
Definition: platform.h:207
+
Definition: convert.h:33
+
CUTLASS_HOST_DEVICE bool operator==(Coord< N > const &b) const
Determines if two Coord<> objects are equal.
Definition: coord.h:205
+
CUTLASS_HOST_DEVICE Coord & operator+=(Coord const &b)
In-place addition.
Definition: coord.h:124
+
CUTLASS_HOST_DEVICE bool operator!=(Coord< N > const &b) const
Not equal.
Definition: coord.h:215
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_hwc(Coord< 4 > const &coord)
Getter.
Definition: coord.h:279
+
CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_dhw(Coord< 4 > const &coord)
Getter.
Definition: coord.h:283
+
CUTLASS_HOST_DEVICE Coord & clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())
Clamps a coordinate to a range specified by maximum and minimum values.
Definition: coord.h:219
+
CUTLASS_HOST_DEVICE int const & at() const
Gets the index of a given Coord element.
Definition: coord.h:195
+
CUTLASS_HOST_DEVICE Coord operator/(Coord const &b) const
Element-wise division.
Definition: coord.h:114
+
Kind
Definition: coord.h:41
+
CUTLASS_HOST_DEVICE T dot(Coord const &b, T sum) const
Computes the dot product of two Coord instances.
Definition: coord.h:166
+
CUTLASS_HOST_DEVICE Coord(int _idx[])
Constructs from an array of integers.
Definition: coord.h:76
+
#define __NV_STD_MAX(a, b)
Select maximum(a, b)
Definition: platform.h:155
+
CUTLASS_HOST_DEVICE int & at(int dim)
Access via index; may limit unrolling potential.
Definition: coord.h:191
+
CUTLASS_HOST_DEVICE int & operator[](int dim)
Member access operator.
Definition: coord.h:159
+
CUTLASS_HOST_DEVICE Coord & operator-=(Coord const &b)
In-place subtraction.
Definition: coord.h:133
+
CUTLASS_HOST_DEVICE Coord operator*(Coord const &b) const
Element-wise multiplication.
Definition: coord.h:104
+
CUTLASS_HOST_DEVICE Coord(int value=0)
Default ctor initializes uniformly.
Definition: coord.h:68
+
CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw(Coord< 3 > const &coord)
Getter.
Definition: coord.h:271
+
static int const N
Definition: coord.h:53
+
#define __NV_STD_MIN(a, b)
Select minimum(a, b)
Definition: platform.h:160
+
CUTLASS_HOST_DEVICE T dot(Coord const &b) const
Computes the dot product of two Coord instances.
Definition: coord.h:175
+
CUTLASS_HOST_DEVICE Coord operator-(Coord const &b) const
Element-wise subtraction.
Definition: coord.h:94
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
std::min
Definition: platform.h:201
+
Definition: coord.h:41
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
CUTLASS_HOST_DEVICE int & at()
Gets the index of a given Coord element.
Definition: coord.h:185
+
int idx[N]
Indices.
Definition: coord.h:60
+
Definition: coord.h:41
+
CUTLASS_HOST_DEVICE int const & at(int dim) const
Access via index; may limit unrolling potential.
Definition: coord.h:201
+
Basic include for CUTLASS macros.
+
CUTLASS_HOST_DEVICE Coord & operator*=(Coord const &b)
In-place multiplication.
Definition: coord.h:142
+
CUTLASS_HOST_DEVICE Coord operator+(Coord const &b) const
Element-wise addition.
Definition: coord.h:84
+
CUTLASS_HOST_DEVICE Coord & operator/=(Coord const &b)
In-place division.
Definition: coord.h:151
+
+ + + + diff --git a/docs/generated-html/core__io_8h.html b/docs/generated-html/core__io_8h.html new file mode 100644 index 0000000000..d71c397167 --- /dev/null +++ b/docs/generated-html/core__io_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: core_io.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
core_io.h File Reference
+
+
+ +

Helpers for printing cutlass/core objects. +More...

+
#include <iosfwd>
+#include <typeinfo>
+#include <cutlass/coord.h>
+
+

Go to the source code of this file.

+ + + + + +

+Functions

template<int Rank>
std::ostream & operator<< (std::ostream &out, cutlass::Coord< Rank > const &coord)
 
+

Function Documentation

+ +

◆ operator<<()

+ +
+
+
+template<int Rank>
+ + + + + + + + + + + + + + + + + + +
std::ostream& operator<< (std::ostream & out,
cutlass::Coord< Rank > const & coord 
)
+
+ +
+
+
+ + + + diff --git a/docs/generated-html/core__io_8h_source.html b/docs/generated-html/core__io_8h_source.html new file mode 100644 index 0000000000..7c076c94da --- /dev/null +++ b/docs/generated-html/core__io_8h_source.html @@ -0,0 +1,88 @@ + + + + + + + +Cutlass: core_io.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
core_io.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 #pragma once
26 
31 #pragma once
32 
33 #include <iosfwd>
34 #include <typeinfo>
35 
36 #include <cutlass/coord.h>
37 
38 template <int Rank>
39 std::ostream& operator<<(std::ostream& out, cutlass::Coord<Rank> const& coord) {
40  for (int i = 0; i < Rank; ++i) {
41  out << (i ? ", " : "") << coord.idx[i];
42  }
43  return out;
44 }
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
+ + + + diff --git a/docs/generated-html/cutlass_8h.html b/docs/generated-html/cutlass_8h.html new file mode 100644 index 0000000000..bbb0463c91 --- /dev/null +++ b/docs/generated-html/cutlass_8h.html @@ -0,0 +1,237 @@ + + + + + + + +Cutlass: cutlass.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass.h File Reference
+
+
+ +

Basic include for CUTLASS macros. +More...

+ +

Go to the source code of this file.

+ + + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + + + + +

+Macros

#define CUTLASS_MAJOR   1
 
#define CUTLASS_MINOR   0
 
#define CUTLASS_PATCH   0
 
#define CUTLASS_VERSION   ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
 
#define CUTLASS_HOST_DEVICE
 
#define CUTLASS_PRAGMA_UNROLL
 
#define CUTLASS_PRAGMA_NO_UNROLL
 
#define CUTLASS_ASSERT(x)   assert(x)
 
+

Macro Definition Documentation

+ +

◆ CUTLASS_ASSERT

+ +
+
+ + + + + + + + +
#define CUTLASS_ASSERT( x)   assert(x)
+
+ +
+
+ +

◆ CUTLASS_HOST_DEVICE

+ +
+
+ + + + +
#define CUTLASS_HOST_DEVICE
+
+ +
+
+ +

◆ CUTLASS_MAJOR

+ +
+
+ + + + +
#define CUTLASS_MAJOR   1
+
+ +
+
+ +

◆ CUTLASS_MINOR

+ +
+
+ + + + +
#define CUTLASS_MINOR   0
+
+ +
+
+ +

◆ CUTLASS_PATCH

+ +
+
+ + + + +
#define CUTLASS_PATCH   0
+
+ +
+
+ +

◆ CUTLASS_PRAGMA_NO_UNROLL

+ +
+
+ + + + +
#define CUTLASS_PRAGMA_NO_UNROLL
+
+ +
+
+ +

◆ CUTLASS_PRAGMA_UNROLL

+ +
+
+ + + + +
#define CUTLASS_PRAGMA_UNROLL
+
+ +
+
+ +

◆ CUTLASS_VERSION

+ +
+
+ + + + +
#define CUTLASS_VERSION   ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
+
+ +
+
+
+ + + + diff --git a/docs/generated-html/cutlass_8h_source.html b/docs/generated-html/cutlass_8h_source.html new file mode 100644 index 0000000000..d2f442295e --- /dev/null +++ b/docs/generated-html/cutlass_8h_source.html @@ -0,0 +1,88 @@ + + + + + + + +Cutlass: cutlass.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
30 #pragma once
31 
33 
34 #define CUTLASS_MAJOR 1
35 #define CUTLASS_MINOR 0
36 #define CUTLASS_PATCH 0
37 #define CUTLASS_VERSION ((CUTLASS_MAJOR)*100 + (CUTLASS_MINOR)*10 + CUTLASS_PATCH)
38 
39 #ifdef __NVCC__
40 #define CUTLASS_HOST_DEVICE __forceinline__ __device__ __host__
41 #define CUTLASS_DEVICE __forceinline__ __device__
42 #elif defined(__CUDACC_RTC__)
43 #define CUTLASS_HOST_DEVICE __forceinline__ __device__
44 #define CUTLASS_DEVICE __forceinline__ __device__
45 #else
46 #define CUTLASS_HOST_DEVICE
47 // CUTLASS_DEVICE is an error if not compiling device code
48 #endif
49 
50 // CUTLASS_PRAGMA_UNROLL inserts a CUTLASS_PRAGMA_UNROLL if supported by the compiler
51 #if defined(__CUDA_ARCH__)
52 #if defined(_MSC_VER)
53 #define CUTLASS_PRAGMA_UNROLL __pragma("unroll")
54 #define CUTLASS_PRAGMA_NO_UNROLL __pragma("unroll 1")
55 #else
56 #define CUTLASS_PRAGMA_UNROLL _Pragma("unroll")
57 #define CUTLASS_PRAGMA_NO_UNROLL _Pragma("unroll 1")
58 #endif
59 #else
60 #define CUTLASS_PRAGMA_UNROLL
61 #define CUTLASS_PRAGMA_NO_UNROLL
62 #endif
63 
64 #define CUTLASS_ASSERT(x) assert(x)
65 
66 namespace cutlass {
67 
69 static const int kWarpSize = 32;
70 
71 } // namespace cutlass
72 
Definition: convert.h:33
+
+ + + + diff --git a/docs/generated-html/cutlass__math_8h.html b/docs/generated-html/cutlass__math_8h.html new file mode 100644 index 0000000000..953b0d4c70 --- /dev/null +++ b/docs/generated-html/cutlass__math_8h.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass_math.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
cutlass_math.h File Reference
+
+
+ +

Math utilities. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::is_pow2< N >
 
struct  cutlass::log2_down< N, CurrentVal, Count >
 
struct  cutlass::log2_down< N, 1, Count >
 
struct  cutlass::log2_up< N, CurrentVal, Count >
 
struct  cutlass::log2_up< N, 1, Count >
 
struct  cutlass::sqrt_est< N >
 
struct  cutlass::divide_assert< Dividend, Divisor >
 
+ + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + +

+Functions

template<typename dividend_t , typename divisor_t >
CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest (dividend_t dividend, divisor_t divisor)
 
template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::gcd (value_t a, value_t b)
 
template<typename value_t >
CUTLASS_HOST_DEVICE value_t cutlass::lcm (value_t a, value_t b)
 
+
+ + + + diff --git a/docs/generated-html/cutlass__math_8h_source.html b/docs/generated-html/cutlass__math_8h_source.html new file mode 100644 index 0000000000..2809a84568 --- /dev/null +++ b/docs/generated-html/cutlass__math_8h_source.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: cutlass_math.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass_math.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
26 #pragma once
27 
33 #include <cutlass/util/platform.h>
34 
35 namespace cutlass {
36 
37 /******************************************************************************
38  * Static math utilities
39  ******************************************************************************/
40 
44 template <int N>
45 struct is_pow2 : platform::integral_constant<bool, (N & (N - 1)) == 0> {};
46 
50 template <int N, int CurrentVal = N, int Count = 0>
51 struct log2_down {
53  enum { value = log2_down<N, (CurrentVal >> 1), Count + 1>::value };
54 };
55 
56 // Base case
57 template <int N, int Count>
58 struct log2_down<N, 1, Count> {
59  enum { value = Count };
60 };
61 
65 template <int N, int CurrentVal = N, int Count = 0>
66 struct log2_up {
68  enum { value = log2_up<N, (CurrentVal >> 1), Count + 1>::value };
69 };
70 
71 // Base case
72 template <int N, int Count>
73 struct log2_up<N, 1, Count> {
74  enum { value = ((1 << Count) < N) ? Count + 1 : Count };
75 };
76 
80 template <int N>
81 struct sqrt_est {
82  enum { value = 1 << (log2_up<N>::value / 2) };
83 };
84 
89 template <int Dividend, int Divisor>
90 struct divide_assert {
91  enum { value = Dividend / Divisor };
92 
93  static_assert((Dividend % Divisor == 0), "Not an even multiple");
94 };
95 
96 /******************************************************************************
97  * Rounding
98  ******************************************************************************/
99 
103 template <typename dividend_t, typename divisor_t>
104 CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor) {
105  return ((dividend + divisor - 1) / divisor) * divisor;
106 }
107 
111 template <typename value_t>
112 CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b) {
113  for (;;) {
114  if (a == 0) return b;
115  b %= a;
116  if (b == 0) return a;
117  a %= b;
118  }
119 }
120 
124 template <typename value_t>
125 CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b) {
126  value_t temp = gcd(a, b);
127 
128  return temp ? (a / temp * b) : 0;
129 }
130 
131 } // namespace cutlass
Definition: cutlass_math.h:91
+
Definition: convert.h:33
+
Definition: cutlass_math.h:51
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
Definition: cutlass_math.h:53
+
CUTLASS_HOST_DEVICE value_t lcm(value_t a, value_t b)
Definition: cutlass_math.h:125
+
CUTLASS_HOST_DEVICE dividend_t round_nearest(dividend_t dividend, divisor_t divisor)
Definition: cutlass_math.h:104
+
Definition: cutlass_math.h:68
+
std::integral_constant
Definition: platform.h:274
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Definition: cutlass_math.h:82
+
CUTLASS_HOST_DEVICE value_t gcd(value_t a, value_t b)
Definition: cutlass_math.h:112
+
Definition: cutlass_math.h:90
+
Definition: cutlass_math.h:66
+
Definition: cutlass_math.h:45
+
Definition: cutlass_math.h:81
+
+ + + + diff --git a/docs/generated-html/debug_8h.html b/docs/generated-html/debug_8h.html new file mode 100644 index 0000000000..1f88396ab8 --- /dev/null +++ b/docs/generated-html/debug_8h.html @@ -0,0 +1,239 @@ + + + + + + + +Cutlass: debug.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
debug.h File Reference
+
+
+ +

Debugging and logging functionality. +More...

+
#include <stdio.h>
+
+

Go to the source code of this file.

+ + + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + +

+Macros

#define CUDA_LOG(format, ...)   printf(format, __VA_ARGS__)
 
#define CUDA_LOG_DEBUG(format, ...)
 
#define CUDA_PERROR(e)   cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)
 Perror macro. More...
 
#define CUDA_PERROR_EXIT(e)
 Perror macro with exit. More...
 
#define CUDA_PERROR_DEBUG(e)   (e)
 Perror macro only if DEBUG is defined. More...
 
+ + + + +

+Functions

__host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl (cudaError_t error, const char *filename, int line)
 The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
 
+

Macro Definition Documentation

+ +

◆ CUDA_LOG

+ +
+
+ + + + + + + + + + + + + + + + + + +
#define CUDA_LOG( format,
 ... 
)   printf(format, __VA_ARGS__)
+
+

Formats and prints the given message to stdout

+ +
+
+ +

◆ CUDA_LOG_DEBUG

+ +
+
+ + + + + + + + + + + + + + + + + + +
#define CUDA_LOG_DEBUG( format,
 ... 
)
+
+

Formats and prints the given message to stdout only if DEBUG is defined

+ +
+
+ +

◆ CUDA_PERROR

+ +
+
+ + + + + + + + +
#define CUDA_PERROR( e)   cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)
+
+ +
+
+ +

◆ CUDA_PERROR_DEBUG

+ +
+
+ + + + + + + + +
#define CUDA_PERROR_DEBUG( e)   (e)
+
+ +
+
+ +

◆ CUDA_PERROR_EXIT

+ +
+
+ + + + + + + + +
#define CUDA_PERROR_EXIT( e)
+
+Value:
if (cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)) { \
exit(1); \
}
__host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error, const char *filename, int line)
The corresponding error message is printed to stderr (or stdout in device code) along with the suppli...
Definition: debug.h:77
+
+
+
+
+ + + + diff --git a/docs/generated-html/debug_8h_source.html b/docs/generated-html/debug_8h_source.html new file mode 100644 index 0000000000..881b4e3f05 --- /dev/null +++ b/docs/generated-html/debug_8h_source.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: debug.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
debug.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
25 
26 #pragma once
27 
33 #include <stdio.h>
34 
35 namespace cutlass {
36 
37 /******************************************************************************
38  * Debug and logging macros
39  ******************************************************************************/
40 
44 #if !defined(CUDA_LOG)
45 #if !defined(__CUDA_ARCH__)
46 #define CUDA_LOG(format, ...) printf(format, __VA_ARGS__)
47 #else
48 #define CUDA_LOG(format, ...) \
49  printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \
50  blockIdx.x, \
51  blockIdx.y, \
52  blockIdx.z, \
53  threadIdx.x, \
54  threadIdx.y, \
55  threadIdx.z, \
56  __VA_ARGS__);
57 #endif
58 #endif
59 
63 #if !defined(CUDA_LOG_DEBUG)
64 #ifdef DEBUG
65 #define CUDA_LOG_DEBUG(format, ...) CUDA_LOG(format, __VA_ARGS__)
66 #else
67 #define CUDA_LOG_DEBUG(format, ...)
68 #endif
69 #endif
70 
77 __host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error,
78  const char* filename,
79  int line) {
80  (void)filename;
81  (void)line;
82  if (error) {
83 #if !defined(__CUDA_ARCH__)
84  fprintf(
85  stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error));
86  fflush(stderr);
87 #else
88  printf("CUDA error %d [%s, %d]\n", error, filename, line);
89 #endif
90  }
91  return error;
92 }
93 
97 #ifndef CUDA_PERROR
98 #define CUDA_PERROR(e) cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)
99 #endif
100 
104 #ifndef CUDA_PERROR_EXIT
105 #define CUDA_PERROR_EXIT(e) \
106  if (cuda_perror_impl((cudaError_t)(e), __FILE__, __LINE__)) { \
107  exit(1); \
108  }
109 #endif
110 
114 #ifndef CUDA_PERROR_DEBUG
115 #ifdef DEBUG
116 #define CUDA_PERROR_DEBUG(e) CUDA_PERROR(e)
117 #else
118 #define CUDA_PERROR_DEBUG(e) (e)
119 #endif
120 #endif
121 
122 } // namespace cutlass
Definition: convert.h:33
+
__host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl(cudaError_t error, const char *filename, int line)
The corresponding error message is printed to stderr (or stdout in device code) along with the suppli...
Definition: debug.h:77
+
+ + + + diff --git a/docs/generated-html/dgemm__traits_8h.html b/docs/generated-html/dgemm__traits_8h.html new file mode 100644 index 0000000000..eebc2f364c --- /dev/null +++ b/docs/generated-html/dgemm__traits_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: dgemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
dgemm_traits.h File Reference
+
+ + + + + diff --git a/docs/generated-html/dgemm__traits_8h_source.html b/docs/generated-html/dgemm__traits_8h_source.html new file mode 100644 index 0000000000..9cf2c8738a --- /dev/null +++ b/docs/generated-html/dgemm__traits_8h_source.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: dgemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
dgemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/gemm/gemm.h>
37 
38 namespace cutlass {
39 namespace gemm {
40 
42 
43 template <
45  typename OutputTile_,
47  typename AccumulatorsPerThread_,
49  int kScalarsPerLdgA_ = 1,
51  int kScalarsPerLdgB_ = 1>
53  : public GemmConfig<
55  double,
57  double,
59  double,
61  double,
63  OutputTile_,
65  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, double, double, double>,
67  kScalarsPerLdgA_,
69  kScalarsPerLdgA_,
71  2,
73  kScalarsPerLdgB_,
75  kScalarsPerLdgB_,
77  2,
79  1,
81  2,
83  1,
85  2> {};
86 
88 
89 template <
91  MatrixLayout::Kind kLayoutA_,
93  MatrixLayout::Kind kLayoutB_,
95  typename OutputTile_ = Shape<8, 64, 128>,
97  typename EpilogueFunctor_ = LinearScaling<double>,
99  typename AccumulatorsPerThread_ = Shape<8, 8, 8>,
101  int kScalarsPerLdgA_ = 1,
103  int kScalarsPerLdgB_ = 1,
105  typename Index_ = int,
107  typename GemmConfig_ =
110  typename GemmEpilogueTraits_ =
113  // The layout for A.
114  kLayoutA_,
115  // The layout for B.
116  kLayoutB_,
117  // The config.
118  GemmConfig_,
119  // The epilogue.
120  GemmEpilogue<GemmEpilogueTraits_>,
121  // The index.
122  Index_> {};
123 
125 
126 } // namespace gemm
127 } // namespace cutlass
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Defines structural properties of complete GEMM computation.
+
Template implementing matrix multiply-add operations on fragments.
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: gemm_traits.h:79
+
Definition: dgemm_traits.h:112
+
Definition: dgemm_traits.h:52
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:300
+
Kind
Definition: matrix_traits.h:36
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Implements a software-pipelined efficient GEMM.
+
Defines structural properties of the GEMM epilogue.
+
Definition: gemm_traits.h:723
+
+ + + + diff --git a/docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html b/docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html new file mode 100644 index 0000000000..d7393ef13f --- /dev/null +++ b/docs/generated-html/dir_1417ee5ebebc309c36b7962f26a92c39.html @@ -0,0 +1,155 @@ + + + + + + + +Cutlass: cutlass Directory Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
cutlass Directory Reference
+
+
+ + + + + + +

+Directories

directory  gemm
 
directory  util
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Files

file  convert.h [code]
 Defines conversion operations among Fragments of different base type.
 
file  coord.h [code]
 A Coord is a coordinate of arbitrary rank into a tensor or matrix.
 
file  core_io.h [code]
 Helpers for printing cutlass/core objects.
 
file  cutlass.h [code]
 Basic include for CUTLASS macros.
 
file  fragment.h [code]
 Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers.
 
file  fragment_load_store.h [code]
 Defines accessors for loading and storing fragments to memory efficiently.
 
file  fragment_multiply_add.h [code]
 Defines multiply-add operations on fragments within a thread.
 
file  iterator_access.h [code]
 Free functions for loading and storing to implementations of tile iteartor concepts.
 
file  load_store.h [code]
 Defines abstractions for efficiently loading and storing vectors to memory.
 
file  matrix_traits.h [code]
 Defines properties of matrices used to denote layout and operands to GEMM kernels.
 
file  predicate_vector.h [code]
 Defines container classes and iterators for managing a statically sized vector of boolean predicates.
 
file  reshape_tile.h [code]
 Defines a type for restructuring a tile.
 
file  shape.h [code]
 Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
 
file  tensor_ref.h [code]
 Defines a structure containing strides, bounds, and a pointer to tensor data.
 
file  tensor_view.h [code]
 Defines a structure containing strides and a pointer to tensor data.
 
file  tile_iterator.h [code]
 Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
 
file  tile_traits_standard.h [code]
 Defines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance.
 
file  vector.h [code]
 Defines a 1D vector of elements held in the registers of each thread.
 
file  wmma_matrix.h [code]
 Abstractions for loading and storing matrices using the CUDA WMMA API.
 
+
+ + + + diff --git a/docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html b/docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html new file mode 100644 index 0000000000..161267475b --- /dev/null +++ b/docs/generated-html/dir_18d6a367a3982a494d65599933fc67a3.html @@ -0,0 +1,178 @@ + + + + + + + +Cutlass: gemm Directory Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm Directory Reference
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Files

file  clear_accumulators.h [code]
 Defines abstractions for efficiently clearing accumulator tiles.
 
file  dgemm_traits.h [code]
 Defines structural traits of double-precision GEMM.
 
file  gemm.h [code]
 Implements a software-pipelined efficient GEMM.
 
file  gemm_epilogue.h [code]
 Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product.
 
file  gemm_epilogue_traits.h [code]
 Defines structural properties of the GEMM epilogue.
 
file  gemm_global_stream.h [code]
 Implements efficient loading of the thread block-level tile from global memory and storing to shared memory.
 
file  gemm_global_tile.h [code]
 Defines iterators for efficiently loading and storing to global memory.
 
file  gemm_operand.h [code]
 Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory.
 
file  gemm_shared_stream.h [code]
 Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline.
 
file  gemm_shared_tile.h [code]
 Defines iterators for efficiently loading and storing tiles to and from shared memory.
 
file  gemm_traits.h [code]
 Defines structural properties of complete GEMM computation.
 
file  hgemm_global_tile.h [code]
 Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits.
 
file  hgemm_multiply_add.h [code]
 Specialization implementing multiply-add operation on half-precision floating point fragments.
 
file  hgemm_swizzle.h [code]
 Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands.
 
file  hgemm_traits.h [code]
 Defies structural properties of half-precision GEMM computation.
 
file  identity_block_swizzle.h [code]
 Defies functors for mapping blockIdx to partitions of the GEMM computation.
 
file  igemm_epilogue.h [code]
 Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats.
 
file  igemm_global_tile.h [code]
 Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory.
 
file  igemm_multiply_add.h [code]
 Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction.
 
file  igemm_swizzle.h [code]
 Transposes a fragment of data containing packed 8-bit integer elements.
 
file  igemm_traits.h [code]
 Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary.
 
file  linear_scaling.h [code]
 Implements the BLAS linear scaling function alpha*AB + beta*C.
 
file  sgemm_traits.h [code]
 Defies structural properties of single-precision GEMM.
 
file  thread_multiply_add.h [code]
 Template implementing matrix multiply-add operations on fragments.
 
file  wmma_gemm_epilogue_traits.h [code]
 Defines structural properties of WMMA GEMM's epilogue phase.
 
file  wmma_gemm_global_tile.h [code]
 Defines tile iterator traits for loading thread block-level tile from global memory.
 
file  wmma_gemm_multiply_add.h [code]
 Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API.
 
file  wmma_gemm_shared_tile.h [code]
 Defines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM.
 
file  wmma_gemm_traits.h [code]
 Defies structural properties of GEMM targeting WMMA API in CUDA.
 
+
+ + + + diff --git a/docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html b/docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html new file mode 100644 index 0000000000..a66eb22fa5 --- /dev/null +++ b/docs/generated-html/dir_c5917a9a879e9a6c73eaf5237444ab84.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: util Directory Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
util Directory Reference
+
+
+ + + + + + + + + + + +

+Files

file  cutlass_math.h [code]
 Math utilities.
 
file  debug.h [code]
 Debugging and logging functionality.
 
file  platform.h [code]
 C++ features that may be otherwise unimplemented for CUDA device functions.
 
+
+ + + + diff --git a/docs/generated-html/doc.png b/docs/generated-html/doc.png new file mode 100644 index 0000000000000000000000000000000000000000..3cbc5d3274e9cc4b77d3f2e889fc440261b0e249 GIT binary patch literal 751 zcmVb?}_^lyr!BQe12@`R8_s-n8iF3%#H zhEGTo{QBIKbDt;)_-*2%;??_Sym&Lt#LP>|yG06H%K&w7twF9ZlZ`n^ZBt=8C!-}6 z*%Zs`g#BWZbAqUozzN26u{Y!%wSh#ev#u7N7O4r!pzrx9ZA*9SHD;FH>10JhO8hD2?`U9c6%8cq@>_ z)n|JCH4CesI3CTA-}t7Znk&rGnH;Tl^&`wqg&bHs=Qr8`N+HY{Z z=N`SHV)XL3D7Z>Xv_lixfhd40JQXPa)v>rXt+TF@QY@`Xf?Bnj#kJ3poF#_)AL`Bq zaD%4;)m$n)#~RXP5*J0q7(>1H8OYow)pVLcxgI(HNLy*jeE@FoRPd+SrWT|Iq9P%? zUXcc?twBb{!+buZ(GF6iYjA3eGbe{!YoO!&I}iZ1aQ#Q6LQPUHBh}5y_mFxP$%C)C zp-tB&EQM7KcMt|k=2QUc;A&A-2<|=}%(~MCL7oif$h_8_|Gcq=V5oI!tjcj&8I?z_AMA=EGgef9ZQEJdAwuz4% zhLA%u{fJT`489o(!!R@SJJA)20+@g0pJY);EyBsU>N|^ih2O>8i*b?(aKFaaUiuQ zj{dw~Hf2SCeg8&#oa(w#~9^Grb1erKHDm0H6ob zchdOay=oi#;JxbkswJj!lFREq)Qn?bTkMy8#Zz4ua$XZb!OdDV_M1Fo7;Mak3lmrG zo|WzVD&v(m=))16=W?x*&$qMa*t!o|HSH+O-+fSmk~_H^ppu4h$FmC8kgO5;x!uFA zEdX6Vf9w{^^(C2njm_wx<1D$>iS1fpIofwN2B7O2JrmNasLeL5+tYeBtq;<~>BM_{ zr(rW8#-pb*6o$5WOnTO&Cz3?^a@POM-Nv?4o8IWqs+TCAe|^8~a^XeRVX}FzM>RUt zVyd|Jl;z@he}nou3i;wVGWMk@`V2LTBa(#rE+uGg&yU*qsl3zhb^|86s=e3N4kLT` z;KhsI0iYUq-fm>qVAl1YwGl1fZ4W@#S$~lAG>qf@f_x+0$fgI|l+PdNHeI$sTz~wm0?xBO#%xz_^zOumuBxS3`QwYw3$ z@a>=E`Y|Sauv}a}Q=E>2I~FU*cQGX@DBQfH*2YuT3hPb$m>^UC*UWpw!frtss3?#b|WLHk)#D;jK z=*7gopFAyx)E~FAbSQkim!^}hGwI5Pd}gT*oVE)9UJ31fh@b&lf|RrU?Ny)k579fV z-<*#`VPPRUckT@78n+>%duj<%uaAwCb!o>hht|RO$jpfpz8wbm&}n;LT)TAhC_HBS zWFOjd&Yo-pk8$nh=joWJ=XdUs@d#9|($m{Vu|zCMj-$^de3*7@{uY3)&saDNS1;bP zrnLgYwQ;TkasTcU>keI;7i?N_0D!KaEy=+CZ&QSCr=jkU?qlc4AjB?L2U4sN6l>0w zEn94*F@+{fK=!U;t{3G!2DOzf^-R*cfxfF9aQ^5mx>GQgjqRikkf#CAb>3?k<;sj~ zO7$c$n`j9tNZGc-qD2#PQ6TAv7q&&6T1lULMb!r~aslZ2FvmXpCv1Ad@^8fXUw(Z6 zxw`|(fBa=OkBwV4icN^gM5FpmMVVRtp9sk$o26tY+Gpuxw-L5n!gMyI@;83UheF}X ziI_+_Wu%k5A}Y|SM+XN{X4k18Yz)f^bX0Uz&2jirSD&tCV&Oa$*M##k7mZMj?Wa3v zmv0?~m%A?)bbi8Y4cE0cxpJje%|0uJE|ASw~K2(J-UuL z^ISMj*_0z$+Ge3#i7hN)5)OY|V$TE_k-+Wkn>bEQf}TCVrxQk0(5H!ZDr@&i0J=V6 zOn?4!c9`BZ+1+8YUf}*dVd*6AtdUDz9=S*_nK7BJ$lP+#nPk#w-HiMfhKEJLtcC7$ zN@EH6LV>(vw`P-kg2tv(ZGHom&vF`QJyo3Fdej@qQMvrQ$qeJru94k%vZ-`iI9%n_ zZ>hMRerhS=R_x~l8_!!ZQ90aJsM{KHkD-9-h_qQ#Jr(MZL}u=s1;POsG=>Swmhd@_?ye$8X6M*Jy^4fRO{06l1ASCZ@!1OK;oZr@|n zsDbXLeLizIqE~N1^Co6`b974ipe0AH3zcVW0C7Ge zHx%(Jvja(9NIuCaUnQfFQ?FhdBC~4qf!^(zHP@LF+?iy{4E(QOs8T2fyf;~#JNSJJ zhQ9AEj&m-jU`1ejJn4GWWPerS41Bxisy(e|zbsBB$WRvmUIj@WBWlaBmY~{V6A1cn zl!bb}%SN5@?Gy0x4%Q=R_gDJGb6XKFC?Gjl&>KZ zJkq6$pWCQt0MNt1{1~iP?T#Z^-TP9d<3u{iQk^GMyBbSsSKMB2>`S`jOuFeLuU;9w zL}q28OTzZ!en?yUod>PQzbsBB$WT`k7XZ8roWnT6quO$;CCJeJ>ZF-CzPD5$$inB9rFe#J+3xB$G|+j^sZz)ISD|>c0yB)v#%g5?RTYSCYKmGe^3&wis!8vc zUmxOgmq`_*Bh*onRm$hvsYdr&P%TzXB73+8I5-XY`%H0t&)IM!tUksB!BF?!&o|)a zI;)gEXOj=H!jm5(7B_0qD6DCVhPZyY+?vJ~wdF}m(2A%)nWc$P*Y$G51o60FV_b|y zvnK6CfxTx}A~E849)Az`q`mDtm$d+NecU9W*qTrvx7S8&(o1e|v>Ps*x+C0Qr3H z*002RL~#_XkBzdB%5l63g=+=L3D28ER!%-+eP*g^+$YB;-GjzoAlZo^T{D{U^9~wU z?H~g{P0;EyLH&lAtG%YI(jzFyH$o&RWR+5nptW0laq7Tz`+C~jdgDsP$~`C7_}TaK zM8o<`M8Og=6u&7E%NJKarQEhArIbx^nLK-0@QPTu$0+pfB-|_j(8HPHbR?|bi--50 z&gQ!IZnK7AGv!J=ul!w<$;w;9otB2V3^8*b}`^Sw*mC|L<4BRs#< zN`HEE&7$esTrR8H_w@8o+a_%G(gS$5$;Ly%1{Hzoc3?F(fEZ&I;TZO??`4k(LA$>*9kJ{nwCVaVo z$B%x&xg$4mv*bZM>yUiDMt-iwrVzZu=30b)I{;lL9figx*9@6vn#q=hMf%&eY1T!QP1+m6`*$Wg;xnnS!Fp`R$+~wd zG%;!*u907@J@y?>x}~-3h1cWg24&C>p*z)>9`wZqLzIg4++3EAR306;a6OHa|Fgp1Jf{%DJ433YwcV#`EWub{)Dk zuXFY8SxR;lLC=1Bj7JsEQ2x&+{2~k@a-2I^nG0*CmPRPObiY-}g?p7I#)p45e!Fcw z6Uoi1cyb4K%8O8R?R(@>9eH^FS?pU4>Xy)wJCQT!zeNrJy)Hg+U3#_pOFeOy&fZOa z_Vjs&ciyevUpq%9o(amO|ARC%r#*?!o3`bjxV`rG-<}U>+@yZ5pCA4z&Npma4`d5X zj2q&gHy`=YR1rZ*HU + + + + + + +Cutlass: File List + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
File List
+
+
+
Here is a list of all files with brief descriptions:
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 clear_accumulators.hDefines abstractions for efficiently clearing accumulator tiles
 convert.hDefines conversion operations among Fragments of different base type
 coord.hA Coord is a coordinate of arbitrary rank into a tensor or matrix
 core_io.hHelpers for printing cutlass/core objects
 cutlass.hBasic include for CUTLASS macros
 cutlass_math.hMath utilities
 debug.hDebugging and logging functionality
 dgemm_traits.hDefines structural traits of double-precision GEMM
 fragment.hDefines Fragment, a statically-sized array for storing parts of matrices within a thread's registers
 fragment_load_store.hDefines accessors for loading and storing fragments to memory efficiently
 fragment_multiply_add.hDefines multiply-add operations on fragments within a thread
 gemm.hImplements a software-pipelined efficient GEMM
 gemm_epilogue.hImplements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product
 gemm_epilogue_traits.hDefines structural properties of the GEMM epilogue
 gemm_global_stream.hImplements efficient loading of the thread block-level tile from global memory and storing to shared memory
 gemm_global_tile.hDefines iterators for efficiently loading and storing to global memory
 gemm_operand.hDefines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory
 gemm_shared_stream.hDefines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline
 gemm_shared_tile.hDefines iterators for efficiently loading and storing tiles to and from shared memory
 gemm_traits.hDefines structural properties of complete GEMM computation
 hgemm_global_tile.hTile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits
 hgemm_multiply_add.hSpecialization implementing multiply-add operation on half-precision floating point fragments
 hgemm_swizzle.hTransposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands
 hgemm_traits.hDefies structural properties of half-precision GEMM computation
 identity_block_swizzle.hDefies functors for mapping blockIdx to partitions of the GEMM computation
 igemm_epilogue.hDefines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point output matrix formats
 igemm_global_tile.hImplements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory
 igemm_multiply_add.hImplements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction
 igemm_swizzle.hTransposes a fragment of data containing packed 8-bit integer elements
 igemm_traits.hDefies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary
 iterator_access.hFree functions for loading and storing to implementations of tile iteartor concepts
 linear_scaling.hImplements the BLAS linear scaling function alpha*AB + beta*C
 load_store.hDefines abstractions for efficiently loading and storing vectors to memory
 matrix_traits.hDefines properties of matrices used to denote layout and operands to GEMM kernels
 platform.hC++ features that may be otherwise unimplemented for CUDA device functions
 predicate_vector.hDefines container classes and iterators for managing a statically sized vector of boolean predicates
 reshape_tile.hDefines a type for restructuring a tile
 sgemm_traits.hDefies structural properties of single-precision GEMM
 shape.hDefines Shape implementing the Layout concept for representing a 4D hypercube of objects
 tensor_ref.hDefines a structure containing strides, bounds, and a pointer to tensor data
 tensor_view.hDefines a structure containing strides and a pointer to tensor data
 thread_multiply_add.hTemplate implementing matrix multiply-add operations on fragments
 tile_iterator.hDefines the Tile Traits concept and iterators for loading and storing to tiles efficiently
 tile_traits_standard.hDefines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance
 vector.hDefines a 1D vector of elements held in the registers of each thread
 wmma_gemm_epilogue_traits.hDefines structural properties of WMMA GEMM's epilogue phase
 wmma_gemm_global_tile.hDefines tile iterator traits for loading thread block-level tile from global memory
 wmma_gemm_multiply_add.hImplements warp-level matrix multiply-accumulate operation using CUDA WMMA API
 wmma_gemm_shared_tile.hDefines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM
 wmma_gemm_traits.hDefies structural properties of GEMM targeting WMMA API in CUDA
 wmma_matrix.hAbstractions for loading and storing matrices using the CUDA WMMA API
+
+
+ + + + diff --git a/docs/generated-html/folderclosed.png b/docs/generated-html/folderclosed.png new file mode 100644 index 0000000000000000000000000000000000000000..7a18333d6942ccac5f7645419008708ac2eb0ea7 GIT binary patch literal 649 zcmV;40(Sk0P)&8$Tzs`>W(9+g#j zTMZVG7t9G*W>r}=&)H~yeg?Zddu3gTzW@BHqSle=sJrg(Afq*!%g3dC*X<&m+5`Xz zBqX}~)Dg9@2RwJh?3F^3gy7?L-qxg!AxQW*B()sfCmEYq1Ox1b14O#CWk0q^>gvqg_+I zrB+b_lIZ=H=QvfrKA7Nk>!PTP>{l{)XA+)6X{G+MHUMHn5-fIJ;&kK2V1iqZF9_L^ zE=8(qz=p@1R9LfG3Y4hzKhvg7^)*D~dP4tB0Z@ z13d|XiquPu){+=$tY~(8Z#UEIHk;tV9t0=z`i0r71EXnj}9_d{sl8Fr3KN^@S z9Jvkv+o5eA)2FZQ%8ktix%7NnowJm(;}Zrpm5#vVAj!)bo5bn43z1CvGFX29*sa=! z{U~!(qHL-VL{4&yz%rY%kxp7<+sGvI1Pd9`*IO&7RZm4SiS5D9&L-8>*ou^wik7or zofMLR&b^RcHyZ%q-B+SVFRn?)+y`9`2sD~xcI1pBm+W*g~ZW2*OK| ze{Fxl;>laX3GOdlmS)BY^`LjICM)ci>#utBt-XeG3%7?8+*`UNbE%T=jO(u)D?QFu ztM&ae&M(}F5Da|DVF!bphck*J97s@az6H?R+|Y2t<8+q%9sopk-$SDZR-7&!6V<|; zsMv?ZgmqAq2~K{mD4C2I;gBp_v!XIy6O(jjPw}XjDIFIR + + + + + + +Cutlass: fragment.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment.h File Reference
+
+
+ +

Defines Fragment, a statically-sized array for storing parts of matrices within a thread's registers. +More...

+
#include <assert.h>
+#include <cutlass/shape.h>
+#include <cutlass/util/cutlass_math.h>
+#include <cutlass/vector.h>
+
+

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::StorageType< kAlignment_ >
 
struct  cutlass::StorageType< 4 >
 
struct  cutlass::StorageType< 2 >
 
struct  cutlass::StorageType< 1 >
 
struct  cutlass::Fragment< Element_, kElements_, kAlignment_ >
 A template defining Fragment Concept. More...
 
struct  cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
 A template defining Fragment Iterator Concept. More...
 
struct  cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/fragment_8h_source.html b/docs/generated-html/fragment_8h_source.html new file mode 100644 index 0000000000..8006bbbdf4 --- /dev/null +++ b/docs/generated-html/fragment_8h_source.html @@ -0,0 +1,141 @@ + + + + + + + +Cutlass: fragment.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <assert.h>
32 #include <cutlass/shape.h>
34 #include <cutlass/vector.h>
35 
36 namespace cutlass {
37 
39 
56 
73 
75 template <int kAlignment_>
76 struct StorageType {
77  typedef uint64_t Type;
78 };
79 template <>
80 struct StorageType<4> {
81  typedef uint32_t Type;
82 };
83 template <>
84 struct StorageType<2> {
85  typedef uint16_t Type;
86 };
87 template <>
88 struct StorageType<1> {
89  typedef uint8_t Type;
90 };
91 
93 
98 template <typename Element_, int kElements_, size_t kAlignment_ = 16>
99 struct Fragment : public AlignedStruct<kAlignment_> {
101  static_assert(kAlignment_ == 16 || kAlignment_ >= sizeof(Element_), "Alignment is too small");
103  static_assert(is_pow2<kAlignment_>::value, "Alignment must be a power of two");
104 
108  typedef Element_ Element;
110  static int const kElements = kElements_;
111 
113  CUTLASS_DEVICE void clear() {
114  // Avoid element-wise access for sub 32b element type
115  if (kAlignment_ >= 8 && (kElements * sizeof(Element)) % 8 == 0) {
116  uint64_t* ptr = reinterpret_cast<uint64_t*>(storage);
117  for (int i = 0; i < (kElements * sizeof(Element)) / 8; ++i) {
118  ptr[i] = uint64_t(0);
119  }
120  } else if (kAlignment_ >= 4 && (kElements * sizeof(Element)) % 4 == 0) {
121  uint32_t* ptr = reinterpret_cast<uint32_t*>(storage);
122  for (int i = 0; i < (kElements * sizeof(Element)) / 4; ++i) {
123  ptr[i] = uint32_t(0);
124  }
125  } else if (kAlignment_ >= 2 && (kElements * sizeof(Element)) % 2 == 0) {
126  uint16_t* ptr = reinterpret_cast<uint16_t*>(storage);
127  for (int i = 0; i < (kElements * sizeof(Element)) / 2; ++i) {
128  ptr[i] = uint16_t(0);
129  }
130  } else {
131  for (int i = 0; i < kElements; ++i) {
132  storage[i] = 0;
133  }
134  }
135  }
136 
138  CUTLASS_DEVICE Element& operator[](int i) {
139  assert(i < kElements_);
140  return reinterpret_cast<Element*>(storage)[i];
141  }
142 
144  CUTLASS_DEVICE Element const& operator[](int i) const {
145  assert(i < kElements_);
146  return reinterpret_cast<Element const*>(storage)[i];
147  }
148 
149  private:
152 
154  static int const kStorageCount =
155  (sizeof(Element_) * kElements_ + sizeof(StorageType) - 1) / sizeof(StorageType);
157  StorageType storage[kStorageCount];
158 
160  static_assert(sizeof(StorageType) <= kAlignment_, "StorageType is too big for given alignment");
161 };
162 
164 
169 template <typename Fragment_, typename Iterations_, typename AccessType_>
174  typedef Fragment_ Fragment;
176  typedef Iterations_ Iterations;
178  typedef AccessType_ AccessType;
179 
181  typedef typename Fragment::Element Element;
183  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
188 
190  template <typename OtherFragment_>
191  CUTLASS_DEVICE FragmentIterator(OtherFragment_& fragment, int offset = 0)
192  : pointer(reinterpret_cast<Element*>(&fragment[offset])) {
193  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
194  }
195 
197  CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
198  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
199  return reinterpret_cast<AccessType const&>(pointer[imm]);
200  }
201 
203  CUTLASS_DEVICE AccessType& at(int d, int h, int w, int c = 0) {
204  int const imm = ComputeOffsetFromStrides<Strides>::get(d, h, w, c);
205  return reinterpret_cast<AccessType&>(pointer[imm]);
206  }
207 
209  CUTLASS_DEVICE AccessType const& operator[](int i) const {
210  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
211  }
212 
214  CUTLASS_DEVICE AccessType& operator[](int i) {
215  return reinterpret_cast<AccessType&>(pointer[i * kElementsPerAccess]);
216  }
217 
219  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
220 
223 };
224 
226 
227 template <typename Fragment_, typename Iterations_, typename AccessType_>
232  typedef Fragment_ Fragment;
234  typedef Iterations_ Iterations;
236  typedef AccessType_ AccessType;
237 
239  typedef typename Fragment::Element Element;
241  static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element));
246 
248  template <typename OtherFragment_>
249  CUTLASS_DEVICE FragmentConstIterator(OtherFragment_& fragment, int offset = 0)
250  : pointer(reinterpret_cast<Element const*>(&fragment[offset])) {
251  static_assert(OtherFragment_::kElements >= Fragment::kElements, "");
252  }
254  CUTLASS_DEVICE FragmentConstIterator(
256  : pointer(reinterpret_cast<Element const*>(rhs_.offset)) {}
257 
259  CUTLASS_DEVICE AccessType const& at(int d, int h, int w, int c = 0) const {
260  int const imm = ComputeOffsetFromStrides<IterationsStrides>::get(d, h, w, c);
261  return reinterpret_cast<AccessType const&>(pointer[imm]);
262  }
263 
265  CUTLASS_DEVICE AccessType const& operator[](int i) const {
266  return reinterpret_cast<AccessType const&>(pointer[i * kElementsPerAccess]);
267  }
268 
270  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
271 
273  Element const* pointer;
274 };
275 
277 
278 } // namespace cutlass
CUTLASS_DEVICE void clear()
Clear a fragment.
Definition: fragment.h:113
+
Definition: convert.h:33
+
CUTLASS_DEVICE Element & operator[](int i)
The accessor.
Definition: fragment.h:138
+
CUTLASS_DEVICE AccessType & at(int d, int h, int w, int c=0)
The accessor.
Definition: fragment.h:203
+
Definition: vector.h:41
+
Definition: fragment.h:228
+
CUTLASS_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:265
+
Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
Definition: shape.h:155
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Fragment::Element Element
The element.
Definition: fragment.h:181
+
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:241
+
Fragment_ Fragment
The fragment.
Definition: fragment.h:174
+
Fragment_ Fragment
The fragment.
Definition: fragment.h:232
+
CUTLASS_DEVICE AccessType & operator[](int i)
The accessor.
Definition: fragment.h:214
+
Fragment::Element Element
The element.
Definition: fragment.h:239
+
ShapeStrides< FragmentShape >::Shape IterationsStrides
The linear strides for iterations.
Definition: fragment.h:245
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:270
+
CUTLASS_DEVICE FragmentIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:191
+
Fragment< Element_, kElements_ > This_
Make sure the alignment makes sense wrt the size of elements.
Definition: fragment.h:101
+
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:172
+
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:243
+
Math utilities.
+
Definition: fragment.h:76
+
uint32_t Type
Definition: fragment.h:81
+
uint8_t Type
Definition: fragment.h:89
+
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:211
+
Element * pointer
The pointer.
Definition: fragment.h:222
+
AccessType_ AccessType
The access type.
Definition: fragment.h:236
+
Definition: shape.h:118
+
ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
The shape of the the fragment.
Definition: fragment.h:185
+
A template defining Fragment Iterator Concept.
Definition: fragment.h:170
+
static int const kElements
The number of elements.
Definition: fragment.h:110
+
CUTLASS_DEVICE Element const & operator[](int i) const
The accessor.
Definition: fragment.h:144
+
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:234
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Iterations_ Iterations
The number of iterations.
Definition: fragment.h:176
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
CUTLASS_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:259
+
Element_ Element
The element.
Definition: fragment.h:108
+
FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
This class.
Definition: fragment.h:230
+
CUTLASS_DEVICE AccessType const & operator[](int i) const
The accessor.
Definition: fragment.h:209
+
uint16_t Type
Definition: fragment.h:85
+
Defines a 1D vector of elements held in the registers of each thread.
+
CUTLASS_DEVICE FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)
Create from non-constant FragmentIterator.
Definition: fragment.h:254
+
static int const kElementsPerAccess
The number of elements per access.
Definition: fragment.h:183
+
ShapeStrides< FragmentShape >::Shape Strides
The linear strides for iterations.
Definition: fragment.h:187
+
Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
+
AccessType_ AccessType
The access type.
Definition: fragment.h:178
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: fragment.h:219
+
uint64_t Type
Definition: fragment.h:77
+
Definition: cutlass_math.h:45
+
CUTLASS_DEVICE FragmentConstIterator(OtherFragment_ &fragment, int offset=0)
Ctor.
Definition: fragment.h:249
+
CUTLASS_DEVICE AccessType const & at(int d, int h, int w, int c=0) const
The accessor.
Definition: fragment.h:197
+
Element const * pointer
The pointer.
Definition: fragment.h:273
+
+ + + + diff --git a/docs/generated-html/fragment__load__store_8h.html b/docs/generated-html/fragment__load__store_8h.html new file mode 100644 index 0000000000..1c92a6840c --- /dev/null +++ b/docs/generated-html/fragment__load__store_8h.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: fragment_load_store.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment_load_store.h File Reference
+
+ + + + + diff --git a/docs/generated-html/fragment__load__store_8h_source.html b/docs/generated-html/fragment__load__store_8h_source.html new file mode 100644 index 0000000000..db877fbdef --- /dev/null +++ b/docs/generated-html/fragment__load__store_8h_source.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: fragment_load_store.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment_load_store.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/load_store.h>
31 #include <cutlass/vector.h>
32 
33 namespace cutlass {
34 
36 
37 template <IteratorFragment::Kind kIteratorFragment,
38  int kAccessSize,
39  typename Scalar_,
40  MemorySpace::Kind Memory_,
41  typename FragmentElement_,
42  int kStride>
43 struct FragmentLoad {};
44 
45 template <int kAccessSize,
46  typename Scalar_,
47  MemorySpace::Kind Memory_,
48  typename FragmentElement_,
49  int kStride>
50 struct FragmentLoad<IteratorFragment::kWmmaMatrix,
51  kAccessSize,
52  Scalar_,
53  Memory_,
54  FragmentElement_,
55  kStride> {
57  typedef FragmentElement_ AccessType;
58 
60  static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) {
61  value.load(&pointer[offset], kStride);
62  }
63 };
64 
65 template <int kAccessSize,
66  typename Scalar_,
67  MemorySpace::Kind Memory_,
68  typename FragmentElement_,
69  int kStride>
71  kAccessSize,
72  Scalar_,
73  Memory_,
74  FragmentElement_,
75  kStride> {
78 
80  static CUTLASS_DEVICE void load(AccessType& value, Scalar_ const* pointer, int offset) {
81  Load<Scalar_, kAccessSize, Memory_>::load(value, pointer, offset);
82  }
83 };
84 
85 template <IteratorFragment::Kind kIteratorFragment,
86  int kAccessSize,
87  typename Scalar_,
88  MemorySpace::Kind Memory_,
89  typename FragmentElement_,
90  int kStride>
91 struct FragmentStore {};
92 
93 template <int kAccessSize,
94  typename Scalar_,
95  MemorySpace::Kind Memory_,
96  typename FragmentElement_,
97  int kStride>
98 struct FragmentStore<IteratorFragment::kWmmaMatrix,
99  kAccessSize,
100  Scalar_,
101  Memory_,
102  FragmentElement_,
103  kStride> {
105  typedef FragmentElement_ AccessType;
106 
108  static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) {
109  value.store(&pointer[offset], kStride);
110  }
111 };
112 
113 template <int kAccessSize,
114  typename Scalar_,
115  MemorySpace::Kind Memory_,
116  typename FragmentElement_,
117  int kStride>
119  kAccessSize,
120  Scalar_,
121  Memory_,
122  FragmentElement_,
123  kStride> {
126 
128  static CUTLASS_DEVICE void store(AccessType const& value, Scalar_* pointer, int offset) {
129  Store<Scalar_, kAccessSize, Memory_>::store(value, pointer, offset);
130  }
131 };
132 
134 
135 }
Definition: fragment_load_store.h:43
+
Vectorize< Scalar_, kAccessSize >::Type AccessType
The input type.
Definition: fragment_load_store.h:125
+
Definition: convert.h:33
+ +
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: fragment_load_store.h:77
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
+
static CUTLASS_DEVICE void store(AccessType const &value, Scalar_ *pointer, int offset)
The store function.
Definition: fragment_load_store.h:108
+
static CUTLASS_DEVICE void store(AccessType const &value, Scalar_ *pointer, int offset)
The store function.
Definition: fragment_load_store.h:128
+
Kind
Definition: load_store.h:40
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:136
+
Kind
Definition: tile_iterator.h:67
+
static CUTLASS_DEVICE void load(AccessType &value, Scalar_ const *pointer, int offset)
The load function.
Definition: fragment_load_store.h:80
+
Defines abstractions for efficiently loading and storing vectors to memory.
+
Definition: vector.h:61
+
Defines a 1D vector of elements held in the registers of each thread.
+ +
Definition: fragment_load_store.h:91
+
static CUTLASS_DEVICE void load(AccessType &value, Scalar_ const *pointer, int offset)
The load function.
Definition: fragment_load_store.h:60
+
Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix.
Definition: tile_iterator.h:66
+
+ + + + diff --git a/docs/generated-html/fragment__multiply__add_8h.html b/docs/generated-html/fragment__multiply__add_8h.html new file mode 100644 index 0000000000..59a94dfdff --- /dev/null +++ b/docs/generated-html/fragment__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: fragment_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment_multiply_add.h File Reference
+
+
+ +

Defines multiply-add operations on fragments within a thread. +More...

+
#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + + + +

+Classes

struct  cutlass::gemm::FragmentMultiplyAdd< Scalar_ >
 
struct  cutlass::gemm::FragmentMultiplyAdd< half >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/fragment__multiply__add_8h_source.html b/docs/generated-html/fragment__multiply__add_8h_source.html new file mode 100644 index 0000000000..9b453fd942 --- /dev/null +++ b/docs/generated-html/fragment__multiply__add_8h_source.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: fragment_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/fragment.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_>
42  typedef Scalar_ ScalarA;
44  typedef Scalar_ ScalarB;
46  typedef Scalar_ ScalarC;
47 
49  CUTLASS_DEVICE FragmentMultiplyAdd() {}
50 
52  template <typename Fragment_>
53  CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const& b, Fragment_& d) {
54  for (int j = 0; j < Fragment_::kElements; ++j) {
55  d[j] = a * b[j];
56  }
57  }
58 
60  template <typename Fragment_>
61  CUTLASS_DEVICE void multiply_add(Scalar_ a,
62  Fragment_ const& b,
63  Fragment_ const& c,
64  Fragment_& d) {
65  for (int j = 0; j < Fragment_::kElements; ++j) {
66  d[j] = a * b[j] + c[j];
67  }
68  }
69 };
70 
72 
73 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
74 template <>
75 struct FragmentMultiplyAdd<half> {
79  typedef half ScalarA;
81  typedef half ScalarB;
83  typedef half ScalarC;
84 
86  CUTLASS_DEVICE FragmentMultiplyAdd() {}
87 
89  template <typename Fragment_>
90  CUTLASS_DEVICE void multiply(half a, Fragment_ const& b, Fragment_& d) {
91 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
92  // The input.
93  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
94  // The output.
95  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
96 
97  // Assemble a half2 from a.
98  __half2 const a_half2 = __half2half2(a);
99 
100  for (int i = 0; i < Fragment_::kElements / 2; ++i) {
101  d_half2[i] = __hmul2(a_half2, b_half2[i]);
102  }
103 #endif
104  }
105 
107  template <typename Fragment_>
108  CUTLASS_DEVICE void multiply_add(half a, Fragment_ const& b, Fragment_ const& c, Fragment_& d) {
109 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
110  // The inputs.
111  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
112  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
113  // The output.
114  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
115 
116  // Assemble a half2 from a.
117  __half2 const a_half2 = __half2half2(a);
118 
119  for (int i = 0; i < Fragment_::kElements / 2; ++i) {
120  d_half2[i] = __hfma2(a_half2, b_half2[i], c_half2[i]);
121  }
122 #endif
123  }
124 };
125 
126 #endif
127 
129 
130 } // namespace gemm
131 } // namespace cutlass
Scalar_ ScalarB
The type for B.
Definition: fragment_multiply_add.h:44
+
Definition: convert.h:33
+
CUTLASS_DEVICE void multiply(Scalar_ a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:53
+
half ScalarA
The type for A.
Definition: fragment_multiply_add.h:79
+
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:86
+
CUTLASS_DEVICE void multiply_add(Scalar_ a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:61
+
half ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:83
+
CUTLASS_DEVICE void multiply_add(half a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
Multiply : d = a*b + c.
Definition: fragment_multiply_add.h:108
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:40
+
Scalar_ ScalarC
The type for C and D.
Definition: fragment_multiply_add.h:46
+
Scalar_ ScalarA
The type for A.
Definition: fragment_multiply_add.h:42
+
CUTLASS_DEVICE FragmentMultiplyAdd()
Ctor.
Definition: fragment_multiply_add.h:49
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
CUTLASS_DEVICE void multiply(half a, Fragment_ const &b, Fragment_ &d)
Multiply : d = a*b.
Definition: fragment_multiply_add.h:90
+
Shape< 1, 1, 1, 1 > InstructionShape
The shape of the instruction.
Definition: fragment_multiply_add.h:77
+
half ScalarB
The type for B.
Definition: fragment_multiply_add.h:81
+
Definition: fragment_multiply_add.h:38
+
+ + + + diff --git a/docs/generated-html/fragment__stream_8h.html b/docs/generated-html/fragment__stream_8h.html new file mode 100644 index 0000000000..7c8ab1e568 --- /dev/null +++ b/docs/generated-html/fragment__stream_8h.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: fragment_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
fragment_stream.h File Reference
+
+
+ +

An abstraction for implementing a stream loading a tile and storing a tile using a pair of tile iterators. +More...

+ +

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
 Manages a pair of iterators to stream data from global memory to shared. More...
 
struct  cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params
 Parameters passed to initialize the ierator. More...
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/fragment__stream_8h_source.html b/docs/generated-html/fragment__stream_8h_source.html new file mode 100644 index 0000000000..0810ad05cf --- /dev/null +++ b/docs/generated-html/fragment__stream_8h_source.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: fragment_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
fragment_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/convert.h>
32 #include <cutlass/fragment.h>
34 #include <cutlass/tensor_ref.h>
35 #include <cutlass/tile_iterator.h>
36 
39 #include <cutlass/matrix_traits.h>
40 
41 namespace cutlass {
42 
44 
46 template <typename Traits_, typename LoadIterator_, typename StoreIterator_,
47  typename Convert_ =
48  FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>,
49  typename Index_ = int>
51  //
52  // Constant and type definitions
53  //
54 
56  typedef Traits_ Traits;
57 
59  typedef LoadIterator_ LoadIterator;
60 
62  typedef StoreIterator_ StoreIterator;
63 
65  typedef Convert_ Convert;
66 
68  typedef Index_ Index;
69 
70  //
71  // Dependent types
72  //
73 
75  typedef typename LoadIterator::Fragment Fragment;
76 
79 
81  typedef typename StoreIterator::Storage Storage;
82 
85 
86  //
87  // Nested classes
88  //
89 
91  struct Params {
93  typedef typename LoadIterator::Params LoadParams;
94 
96  typedef typename StoreIterator::Params StoreParams;
97 
98  //
99  // Data members
100  //
101 
104 
107 
108  //
109  // Methods
110  //
111 
113  CUTLASS_HOST_DEVICE int initialize(LoadParams const &_load_params,
114  StoreParams const &_store_params) {
115  load_params = _load_params;
116  store_params = _store_params;
117  return 0;
118  }
119  };
120 
121  //
122  // Data members
123  //
124 
127 
129  typename LoadIterator::PredicateVector predicates;
130 
133 
136 
139 
140  //
141  // Static members
142  //
143 
145  static CUTLASS_DEVICE void shared_store_fence() { __syncthreads(); }
146 
147  //
148  // Methods
149  //
150 
151  CUTLASS_DEVICE
153 
155  CUTLASS_DEVICE
156  FragmentStream(Params const &params, Coord<3> const &bounds,
157  Coord<3> const &block_offset = make_Coord(0, 0, 0))
158  : load_iterator(params.load_params, block_offset), store_iterator(params.store_params) {
159  // set predicates
160  initialize_predicates(bounds, block_offset);
161 
162  fetch.clear();
163  }
164 
166  CUTLASS_DEVICE
167  void load() {
168  ConstPredicateTileAdapter<typename LoadIterator::PredicateVector,
169  typename LoadIterator::Iterations>
170  predicates_it(predicates);
172  }
173 
175  CUTLASS_DEVICE
176  void commit() {
177  StoreFragment store_fragment(convert(fetch));
178 
179  iterator_store(store_iterator, store_fragment);
180  }
181 
183  CUTLASS_DEVICE
184  void initialize_predicates(Coord<3> const &bounds, Coord<3> const &block_offset) {
186  predicates_it(predicates);
187  load_iterator.initialize_predicates(predicates_it, bounds, block_offset);
188  }
189 };
190 
192 
193 } // namespace cutlass
StoreIterator store_iterator
Stores fragment to shared memory.
Definition: fragment_stream.h:132
+ +
Definition: convert.h:34
+
Defines a structure containing strides, bounds, and a pointer to tensor data.
+
StoreParams store_params
Parameters to the store iterator.
Definition: fragment_stream.h:106
+
LoadIterator::PredicateVector predicates
Predicate vector.
Definition: fragment_stream.h:129
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
Defines structural properties of complete GEMM computation.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
StoreIterator::Storage Storage
Destination storage.
Definition: fragment_stream.h:81
+
CUTLASS_DEVICE FragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
Constructor.
Definition: fragment_stream.h:156
+
Fragment fetch
Fragment fetched by load iterator.
Definition: fragment_stream.h:135
+
CUTLASS_DEVICE void commit()
Commits the fragment.
Definition: fragment_stream.h:176
+
StoreIterator_ StoreIterator
Defines the store iterator.
Definition: fragment_stream.h:62
+
Adapter to enable random access to predicates via logical coordinate within a tile.
Definition: predicate_vector.h:435
+
Index_ Index
Index type.
Definition: fragment_stream.h:68
+
Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSize > Storage
The storage.
Definition: tile_iterator.h:181
+ +
static CUTLASS_DEVICE void shared_store_fence()
The memory fence for shared stores.
Definition: fragment_stream.h:145
+
Free functions for loading and storing to implementations of tile iteartor concepts.
+ +
CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
Loads a fragment from an input iterator, masked by a predicate iterator.
Definition: iterator_access.h:113
+
StoreIterator::Storage SharedStoreStorage
The storage in shared memory.
Definition: fragment_stream.h:84
+
CUTLASS_DEVICE void initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
Recomputes predicates.
Definition: fragment_stream.h:184
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Manages a pair of iterators to stream data from global memory to shared.
Definition: fragment_stream.h:50
+
CUTLASS_DEVICE FragmentStream()
Definition: fragment_stream.h:152
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:37
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:174
+
LoadIterator::Params LoadParams
Load parameters.
Definition: fragment_stream.h:93
+
Convert_ Convert
Converts between tiles.
Definition: fragment_stream.h:65
+
StoreIterator::Fragment StoreFragment
Stored fragment type.
Definition: fragment_stream.h:78
+
Traits_ Traits
Defines traits of WMMA GEMM tile stream.
Definition: fragment_stream.h:56
+
CUTLASS_DEVICE void load()
Loads the fragment.
Definition: fragment_stream.h:167
+
StoreIterator::Params StoreParams
Store parameters.
Definition: fragment_stream.h:96
+
Adapter to enable random access to predicates via logical coordinate within a tile.
Definition: predicate_vector.h:466
+
LoadIterator::Fragment Fragment
Loaded fragment type.
Definition: fragment_stream.h:75
+
LoadIterator_ LoadIterator
Defines the load iterator.
Definition: fragment_stream.h:59
+
CUTLASS_HOST_DEVICE int initialize(LoadParams const &_load_params, StoreParams const &_store_params)
Initializes parameters.
Definition: fragment_stream.h:113
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
LoadParams load_params
Parameters to load iterator.
Definition: fragment_stream.h:103
+
Convert convert
Converts between load fragments and store fragments.
Definition: fragment_stream.h:138
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
LoadIterator load_iterator
Loads fragment from global memory.
Definition: fragment_stream.h:126
+
Defines conversion operations among Fragments of different base type.
+ +
Parameters passed to initialize the ierator.
Definition: fragment_stream.h:91
+
+ + + + diff --git a/docs/generated-html/functions.html b/docs/generated-html/functions.html new file mode 100644 index 0000000000..e6b156fbce --- /dev/null +++ b/docs/generated-html/functions.html @@ -0,0 +1,149 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- a -

+
+ + + + diff --git a/docs/generated-html/functions_0x7e.html b/docs/generated-html/functions_0x7e.html new file mode 100644 index 0000000000..41aa664c47 --- /dev/null +++ b/docs/generated-html/functions_0x7e.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- ~ -

+
+ + + + diff --git a/docs/generated-html/functions_b.html b/docs/generated-html/functions_b.html new file mode 100644 index 0000000000..79038aa18d --- /dev/null +++ b/docs/generated-html/functions_b.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- b -

+
+ + + + diff --git a/docs/generated-html/functions_c.html b/docs/generated-html/functions_c.html new file mode 100644 index 0000000000..9da7dd064e --- /dev/null +++ b/docs/generated-html/functions_c.html @@ -0,0 +1,154 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- c -

+
+ + + + diff --git a/docs/generated-html/functions_d.html b/docs/generated-html/functions_d.html new file mode 100644 index 0000000000..ca73b9191c --- /dev/null +++ b/docs/generated-html/functions_d.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- d -

+
+ + + + diff --git a/docs/generated-html/functions_e.html b/docs/generated-html/functions_e.html new file mode 100644 index 0000000000..ee616a11c7 --- /dev/null +++ b/docs/generated-html/functions_e.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- e -

+
+ + + + diff --git a/docs/generated-html/functions_enum.html b/docs/generated-html/functions_enum.html new file mode 100644 index 0000000000..b710de0fe1 --- /dev/null +++ b/docs/generated-html/functions_enum.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: Class Members - Enumerations + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_eval.html b/docs/generated-html/functions_eval.html new file mode 100644 index 0000000000..40c01ec853 --- /dev/null +++ b/docs/generated-html/functions_eval.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: Class Members - Enumerator + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- a -

+ + +

- k -

+ + +

- m -

+ + +

- v -

+
+ + + + diff --git a/docs/generated-html/functions_f.html b/docs/generated-html/functions_f.html new file mode 100644 index 0000000000..e2a60d90a2 --- /dev/null +++ b/docs/generated-html/functions_f.html @@ -0,0 +1,168 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- f -

+
+ + + + diff --git a/docs/generated-html/functions_func.html b/docs/generated-html/functions_func.html new file mode 100644 index 0000000000..0a425c0a11 --- /dev/null +++ b/docs/generated-html/functions_func.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_0x7e.html b/docs/generated-html/functions_func_0x7e.html new file mode 100644 index 0000000000..bf8de83a89 --- /dev/null +++ b/docs/generated-html/functions_func_0x7e.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- ~ -

+
+ + + + diff --git a/docs/generated-html/functions_func_b.html b/docs/generated-html/functions_func_b.html new file mode 100644 index 0000000000..5d533d802e --- /dev/null +++ b/docs/generated-html/functions_func_b.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_c.html b/docs/generated-html/functions_func_c.html new file mode 100644 index 0000000000..19541d873f --- /dev/null +++ b/docs/generated-html/functions_func_c.html @@ -0,0 +1,141 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- c -

+
+ + + + diff --git a/docs/generated-html/functions_func_d.html b/docs/generated-html/functions_func_d.html new file mode 100644 index 0000000000..4c1c062a01 --- /dev/null +++ b/docs/generated-html/functions_func_d.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_e.html b/docs/generated-html/functions_func_e.html new file mode 100644 index 0000000000..89f2b82a27 --- /dev/null +++ b/docs/generated-html/functions_func_e.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_f.html b/docs/generated-html/functions_func_f.html new file mode 100644 index 0000000000..a614ede1b7 --- /dev/null +++ b/docs/generated-html/functions_func_f.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_g.html b/docs/generated-html/functions_func_g.html new file mode 100644 index 0000000000..b302373663 --- /dev/null +++ b/docs/generated-html/functions_func_g.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- g -

+
+ + + + diff --git a/docs/generated-html/functions_func_h.html b/docs/generated-html/functions_func_h.html new file mode 100644 index 0000000000..7eb85aac8d --- /dev/null +++ b/docs/generated-html/functions_func_h.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- h -

+
+ + + + diff --git a/docs/generated-html/functions_func_i.html b/docs/generated-html/functions_func_i.html new file mode 100644 index 0000000000..16cfdc5180 --- /dev/null +++ b/docs/generated-html/functions_func_i.html @@ -0,0 +1,163 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_func_l.html b/docs/generated-html/functions_func_l.html new file mode 100644 index 0000000000..c76f9fc530 --- /dev/null +++ b/docs/generated-html/functions_func_l.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_m.html b/docs/generated-html/functions_func_m.html new file mode 100644 index 0000000000..2c68ec4f36 --- /dev/null +++ b/docs/generated-html/functions_func_m.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_o.html b/docs/generated-html/functions_func_o.html new file mode 100644 index 0000000000..fb7b39f737 --- /dev/null +++ b/docs/generated-html/functions_func_o.html @@ -0,0 +1,193 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- o -

+
+ + + + diff --git a/docs/generated-html/functions_func_p.html b/docs/generated-html/functions_func_p.html new file mode 100644 index 0000000000..8f1b5e8a84 --- /dev/null +++ b/docs/generated-html/functions_func_p.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_r.html b/docs/generated-html/functions_func_r.html new file mode 100644 index 0000000000..96a3353379 --- /dev/null +++ b/docs/generated-html/functions_func_r.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_s.html b/docs/generated-html/functions_func_s.html new file mode 100644 index 0000000000..197958afab --- /dev/null +++ b/docs/generated-html/functions_func_s.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_func_t.html b/docs/generated-html/functions_func_t.html new file mode 100644 index 0000000000..13b5b7ab28 --- /dev/null +++ b/docs/generated-html/functions_func_t.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_u.html b/docs/generated-html/functions_func_u.html new file mode 100644 index 0000000000..2ca7d7a113 --- /dev/null +++ b/docs/generated-html/functions_func_u.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- u -

+
+ + + + diff --git a/docs/generated-html/functions_func_v.html b/docs/generated-html/functions_func_v.html new file mode 100644 index 0000000000..ee4e6108be --- /dev/null +++ b/docs/generated-html/functions_func_v.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_func_w.html b/docs/generated-html/functions_func_w.html new file mode 100644 index 0000000000..ef637faa0f --- /dev/null +++ b/docs/generated-html/functions_func_w.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Functions + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- w -

+
+ + + + diff --git a/docs/generated-html/functions_g.html b/docs/generated-html/functions_g.html new file mode 100644 index 0000000000..9493c5ce48 --- /dev/null +++ b/docs/generated-html/functions_g.html @@ -0,0 +1,231 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- g -

+
+ + + + diff --git a/docs/generated-html/functions_h.html b/docs/generated-html/functions_h.html new file mode 100644 index 0000000000..4c7693dbf0 --- /dev/null +++ b/docs/generated-html/functions_h.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- h -

+
+ + + + diff --git a/docs/generated-html/functions_i.html b/docs/generated-html/functions_i.html new file mode 100644 index 0000000000..a91cf4c4ec --- /dev/null +++ b/docs/generated-html/functions_i.html @@ -0,0 +1,268 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_k.html b/docs/generated-html/functions_k.html new file mode 100644 index 0000000000..1aab81d80e --- /dev/null +++ b/docs/generated-html/functions_k.html @@ -0,0 +1,376 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- k -

+
+ + + + diff --git a/docs/generated-html/functions_l.html b/docs/generated-html/functions_l.html new file mode 100644 index 0000000000..441d9d32ea --- /dev/null +++ b/docs/generated-html/functions_l.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- l -

+
+ + + + diff --git a/docs/generated-html/functions_m.html b/docs/generated-html/functions_m.html new file mode 100644 index 0000000000..1a9fe80954 --- /dev/null +++ b/docs/generated-html/functions_m.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- m -

+
+ + + + diff --git a/docs/generated-html/functions_n.html b/docs/generated-html/functions_n.html new file mode 100644 index 0000000000..bff5fbb328 --- /dev/null +++ b/docs/generated-html/functions_n.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_o.html b/docs/generated-html/functions_o.html new file mode 100644 index 0000000000..b79e0bd1b8 --- /dev/null +++ b/docs/generated-html/functions_o.html @@ -0,0 +1,213 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- o -

+
+ + + + diff --git a/docs/generated-html/functions_p.html b/docs/generated-html/functions_p.html new file mode 100644 index 0000000000..9131d0117a --- /dev/null +++ b/docs/generated-html/functions_p.html @@ -0,0 +1,164 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- p -

+
+ + + + diff --git a/docs/generated-html/functions_r.html b/docs/generated-html/functions_r.html new file mode 100644 index 0000000000..79d3a2e979 --- /dev/null +++ b/docs/generated-html/functions_r.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_s.html b/docs/generated-html/functions_s.html new file mode 100644 index 0000000000..0f0af76a37 --- /dev/null +++ b/docs/generated-html/functions_s.html @@ -0,0 +1,383 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_t.html b/docs/generated-html/functions_t.html new file mode 100644 index 0000000000..1b83a09191 --- /dev/null +++ b/docs/generated-html/functions_t.html @@ -0,0 +1,280 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- t -

+
+ + + + diff --git a/docs/generated-html/functions_type.html b/docs/generated-html/functions_type.html new file mode 100644 index 0000000000..2241f00f3d --- /dev/null +++ b/docs/generated-html/functions_type.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- a -

+
+ + + + diff --git a/docs/generated-html/functions_type_b.html b/docs/generated-html/functions_type_b.html new file mode 100644 index 0000000000..e092b1a8f1 --- /dev/null +++ b/docs/generated-html/functions_type_b.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- b -

+
+ + + + diff --git a/docs/generated-html/functions_type_c.html b/docs/generated-html/functions_type_c.html new file mode 100644 index 0000000000..e797657a52 --- /dev/null +++ b/docs/generated-html/functions_type_c.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_d.html b/docs/generated-html/functions_type_d.html new file mode 100644 index 0000000000..827859c229 --- /dev/null +++ b/docs/generated-html/functions_type_d.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- d -

+
+ + + + diff --git a/docs/generated-html/functions_type_e.html b/docs/generated-html/functions_type_e.html new file mode 100644 index 0000000000..cac932d07e --- /dev/null +++ b/docs/generated-html/functions_type_e.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_f.html b/docs/generated-html/functions_type_f.html new file mode 100644 index 0000000000..a71defeb3a --- /dev/null +++ b/docs/generated-html/functions_type_f.html @@ -0,0 +1,141 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- f -

+
+ + + + diff --git a/docs/generated-html/functions_type_g.html b/docs/generated-html/functions_type_g.html new file mode 100644 index 0000000000..4ae366f319 --- /dev/null +++ b/docs/generated-html/functions_type_g.html @@ -0,0 +1,183 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- g -

+
+ + + + diff --git a/docs/generated-html/functions_type_i.html b/docs/generated-html/functions_type_i.html new file mode 100644 index 0000000000..6be0ee90e5 --- /dev/null +++ b/docs/generated-html/functions_type_i.html @@ -0,0 +1,160 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_type_l.html b/docs/generated-html/functions_type_l.html new file mode 100644 index 0000000000..2e7334f0d3 --- /dev/null +++ b/docs/generated-html/functions_type_l.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_m.html b/docs/generated-html/functions_type_m.html new file mode 100644 index 0000000000..043340a51a --- /dev/null +++ b/docs/generated-html/functions_type_m.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_n.html b/docs/generated-html/functions_type_n.html new file mode 100644 index 0000000000..bb5ad36c7c --- /dev/null +++ b/docs/generated-html/functions_type_n.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_o.html b/docs/generated-html/functions_type_o.html new file mode 100644 index 0000000000..42ed281395 --- /dev/null +++ b/docs/generated-html/functions_type_o.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- o -

+
+ + + + diff --git a/docs/generated-html/functions_type_p.html b/docs/generated-html/functions_type_p.html new file mode 100644 index 0000000000..4e124be923 --- /dev/null +++ b/docs/generated-html/functions_type_p.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- p -

+
+ + + + diff --git a/docs/generated-html/functions_type_s.html b/docs/generated-html/functions_type_s.html new file mode 100644 index 0000000000..2d67bf448e --- /dev/null +++ b/docs/generated-html/functions_type_s.html @@ -0,0 +1,278 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_type_t.html b/docs/generated-html/functions_type_t.html new file mode 100644 index 0000000000..54dffcad23 --- /dev/null +++ b/docs/generated-html/functions_type_t.html @@ -0,0 +1,227 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- t -

+
+ + + + diff --git a/docs/generated-html/functions_type_v.html b/docs/generated-html/functions_type_v.html new file mode 100644 index 0000000000..60177eeed6 --- /dev/null +++ b/docs/generated-html/functions_type_v.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_w.html b/docs/generated-html/functions_type_w.html new file mode 100644 index 0000000000..ba510b56ad --- /dev/null +++ b/docs/generated-html/functions_type_w.html @@ -0,0 +1,90 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_type_y.html b/docs/generated-html/functions_type_y.html new file mode 100644 index 0000000000..e20abf82a1 --- /dev/null +++ b/docs/generated-html/functions_type_y.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members - Typedefs + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_u.html b/docs/generated-html/functions_u.html new file mode 100644 index 0000000000..fc2ce25cc7 --- /dev/null +++ b/docs/generated-html/functions_u.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- u -

+
+ + + + diff --git a/docs/generated-html/functions_v.html b/docs/generated-html/functions_v.html new file mode 100644 index 0000000000..ebea1a15a8 --- /dev/null +++ b/docs/generated-html/functions_v.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- v -

+
+ + + + diff --git a/docs/generated-html/functions_vars.html b/docs/generated-html/functions_vars.html new file mode 100644 index 0000000000..43affc3190 --- /dev/null +++ b/docs/generated-html/functions_vars.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_b.html b/docs/generated-html/functions_vars_b.html new file mode 100644 index 0000000000..4b145d14ef --- /dev/null +++ b/docs/generated-html/functions_vars_b.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_c.html b/docs/generated-html/functions_vars_c.html new file mode 100644 index 0000000000..5e5a2251e0 --- /dev/null +++ b/docs/generated-html/functions_vars_c.html @@ -0,0 +1,89 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_d.html b/docs/generated-html/functions_vars_d.html new file mode 100644 index 0000000000..df6d2e3e95 --- /dev/null +++ b/docs/generated-html/functions_vars_d.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_e.html b/docs/generated-html/functions_vars_e.html new file mode 100644 index 0000000000..f98d4a1b6c --- /dev/null +++ b/docs/generated-html/functions_vars_e.html @@ -0,0 +1,87 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_f.html b/docs/generated-html/functions_vars_f.html new file mode 100644 index 0000000000..71f57760c2 --- /dev/null +++ b/docs/generated-html/functions_vars_f.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_g.html b/docs/generated-html/functions_vars_g.html new file mode 100644 index 0000000000..712c6b5da9 --- /dev/null +++ b/docs/generated-html/functions_vars_g.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_i.html b/docs/generated-html/functions_vars_i.html new file mode 100644 index 0000000000..e0bfcd7846 --- /dev/null +++ b/docs/generated-html/functions_vars_i.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- i -

+
+ + + + diff --git a/docs/generated-html/functions_vars_k.html b/docs/generated-html/functions_vars_k.html new file mode 100644 index 0000000000..04e3c5d992 --- /dev/null +++ b/docs/generated-html/functions_vars_k.html @@ -0,0 +1,327 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- k -

+
+ + + + diff --git a/docs/generated-html/functions_vars_l.html b/docs/generated-html/functions_vars_l.html new file mode 100644 index 0000000000..78a99ee379 --- /dev/null +++ b/docs/generated-html/functions_vars_l.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_m.html b/docs/generated-html/functions_vars_m.html new file mode 100644 index 0000000000..d97d3377e2 --- /dev/null +++ b/docs/generated-html/functions_vars_m.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_n.html b/docs/generated-html/functions_vars_n.html new file mode 100644 index 0000000000..60139fe65e --- /dev/null +++ b/docs/generated-html/functions_vars_n.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_p.html b/docs/generated-html/functions_vars_p.html new file mode 100644 index 0000000000..41f8498f9f --- /dev/null +++ b/docs/generated-html/functions_vars_p.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- p -

+
+ + + + diff --git a/docs/generated-html/functions_vars_r.html b/docs/generated-html/functions_vars_r.html new file mode 100644 index 0000000000..034ca3db1e --- /dev/null +++ b/docs/generated-html/functions_vars_r.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_s.html b/docs/generated-html/functions_vars_s.html new file mode 100644 index 0000000000..52934eee7f --- /dev/null +++ b/docs/generated-html/functions_vars_s.html @@ -0,0 +1,144 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+  + +

- s -

+
+ + + + diff --git a/docs/generated-html/functions_vars_t.html b/docs/generated-html/functions_vars_t.html new file mode 100644 index 0000000000..4fdfb30331 --- /dev/null +++ b/docs/generated-html/functions_vars_t.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_vars_v.html b/docs/generated-html/functions_vars_v.html new file mode 100644 index 0000000000..f773cd011f --- /dev/null +++ b/docs/generated-html/functions_vars_v.html @@ -0,0 +1,90 @@ + + + + + + + +Cutlass: Class Members - Variables + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_w.html b/docs/generated-html/functions_w.html new file mode 100644 index 0000000000..9ef19c96f8 --- /dev/null +++ b/docs/generated-html/functions_w.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ + + + + + diff --git a/docs/generated-html/functions_y.html b/docs/generated-html/functions_y.html new file mode 100644 index 0000000000..a97be07e0d --- /dev/null +++ b/docs/generated-html/functions_y.html @@ -0,0 +1,86 @@ + + + + + + + +Cutlass: Class Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all class members with links to the classes they belong to:
+ +

- y -

+
+ + + + diff --git a/docs/generated-html/gemm_8h.html b/docs/generated-html/gemm_8h.html new file mode 100644 index 0000000000..9996508bc6 --- /dev/null +++ b/docs/generated-html/gemm_8h.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: gemm.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm.h File Reference
+
+
+ +

Implements a software-pipelined efficient GEMM. +More...

+
#include <cuda.h>
+#include <cutlass/coord.h>
+#include <cutlass/util/platform.h>
+
+

Go to the source code of this file.

+ + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmDesc< Scalar_, Index_ >
 
struct  cutlass::gemm::Gemm< GemmTraits_ >
 
struct  cutlass::gemm::Gemm< GemmTraits_ >::Params
 The params. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+ + + + +

+Functions

template<typename Gemm_ >
__global__ void cutlass::gemm::gemm_kernel (typename Gemm_::Params params)
 
+
+ + + + diff --git a/docs/generated-html/gemm_8h_source.html b/docs/generated-html/gemm_8h_source.html new file mode 100644 index 0000000000..216e9b5abc --- /dev/null +++ b/docs/generated-html/gemm_8h_source.html @@ -0,0 +1,130 @@ + + + + + + + +Cutlass: gemm.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #if !defined(__CUDACC_RTC__)
31 #include <cuda.h>
32 #endif
33 
34 #include <cutlass/coord.h>
35 #include <cutlass/util/platform.h>
36 
37 namespace cutlass {
38 namespace gemm {
39 
41 
42 template <typename Gemm_>
43 __global__ void gemm_kernel(typename Gemm_::Params params) {
44  // Declare shared memory.
45  __shared__ typename Gemm_::SharedStorage shared_storage;
46 
47  // Construct the GEMM object.
48  Gemm_ gemm(params, shared_storage);
49  // Run GEMM.
50  gemm.multiply_add();
51 }
52 
54 
55 template <typename Scalar_, typename Index_ = int>
56 struct GemmDesc {
58  Index_ m, n, k;
60  Scalar_ alpha, beta;
62  void const* d_a;
64  Index_ lda;
66  void const* d_b;
68  Index_ ldb;
70  void const* d_c;
72  Index_ ldc;
74  void* d_d;
76  Index_ ldd;
77 };
78 
80 
81 template <typename GemmTraits_>
82 struct Gemm {
86  typedef GemmTraits_ Traits;
88  typedef typename Traits::SharedStorage SharedStorage;
89 
91  typedef typename Traits::ScalarA ScalarA;
93  typedef typename Traits::ScalarB ScalarB;
95  typedef typename Traits::Epilogue::Scalar ScalarEpilogue;
97  typedef typename Traits::Epilogue::ScalarC ScalarC;
99  typedef typename Traits::Epilogue::ScalarD ScalarD;
101  typedef typename Traits::Index Index;
102 
104  static int const kThreads = Traits::GemmConfig::kThreads;
105 
107  struct Params : public Traits::Params {
109  Index n,
110  Index k,
111  ScalarEpilogue alpha,
112  ScalarA const* d_a,
113  Index lda,
114  ScalarB const* d_b,
115  Index ldb,
116  ScalarEpilogue beta,
117  ScalarC const* d_c,
118  Index ldc,
119  ScalarD* d_d,
120  Index ldd) {
122  desc.m = m;
123  desc.n = n;
124  desc.k = k;
125  desc.alpha = alpha;
126  desc.beta = beta;
127  desc.d_a = reinterpret_cast<void const*>(d_a);
128  desc.lda = lda;
129  desc.d_b = reinterpret_cast<void const*>(d_b);
130  desc.ldb = ldb;
131  desc.d_c = reinterpret_cast<void const*>(d_c);
132  desc.ldc = ldc;
133  desc.d_d = reinterpret_cast<void*>(d_d);
134  desc.ldd = ldd;
135  return Traits::Params::initialize(desc);
136  }
137  };
138 
139 #if !defined(__CUDACC_RTC__)
140  static __host__ cudaError_t launch(Params const& params,
142  cudaStream_t stream = cudaStreamDefault) {
143  // Setup the grid.
144  dim3 grid;
145  grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW;
146  grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH;
147 
148  // The number of threads.
149  dim3 block;
150  block.x = kThreads;
151 
152  // Launch the kernel.
153  void const* params_ = reinterpret_cast<void const*>(&params);
154 
155  return cudaLaunchKernel(reinterpret_cast<void*>(&gemm_kernel<This_>),
156  grid,
157  block,
158  const_cast<void**>(&params_),
159  0,
160  stream);
161  }
162 
164  static __host__ cudaError_t launch(CUfunction kernel,
165  Params const& params,
166  CUstream stream = CU_STREAM_LEGACY) {
167  // Setup the grid.
168  dim3 grid;
169  grid.x = (params.m + Traits::OutputTile::kW - 1) / Traits::OutputTile::kW;
170  grid.y = (params.n + Traits::OutputTile::kH - 1) / Traits::OutputTile::kH;
171 
172  // The number of threads.
173  dim3 block;
174  block.x = kThreads;
175 
176  // Launch the kernel.
177  void* params_[] = {const_cast<void*>(reinterpret_cast<void const*>(&params))};
178 
179  // return cudaLaunchKernel(reinterpret_cast<void*>(&gemm_kernel<This_>), grid, block,
180  // const_cast<void**>(&params_), 0, stream);
181  CUresult result = cuLaunchKernel(
182  kernel, grid.x, grid.y, grid.z, block.x, block.y, block.z, 0, stream, params_, 0);
183 
184  if (result != CUDA_SUCCESS) {
185  return cudaErrorLaunchFailure;
186  }
187  return cudaSuccess;
188  }
189 
190 #endif
191 
193  CUTLASS_DEVICE Gemm(Params const& params_, SharedStorage& shared_storage_)
194  : params(params_), shared_storage(shared_storage_) {}
195 
197  CUTLASS_DEVICE void multiply_add() {
198  // Swizzle the IDs of the block (to enable better cache behavior).
199  typename Traits::BlockSwizzle block_swizzle;
200  dim3 block = block_swizzle.swizzle();
201 
202  // Scale the id.
203  block.x *= Traits::OutputTile::kW;
204  block.y *= Traits::OutputTile::kH;
205 
206  // We may want to use shared memory to clear the registers.
207  typedef typename Traits::ClearAccumulators ClearAccumulators;
208 
209  // The streams to read A/B from global memory to shared memory.
210  typename Traits::GlobalLoadStream global_stream(params, shared_storage, block);
211 
212  // Create the accumulator clear.
213  ClearAccumulators clear(shared_storage.main_loop.clear);
214 
216  typedef typename Traits::MultiplyAdd MultiplyAdd;
217 
218  // By how much we unroll the main loop.
219  Index const kUnroll = static_cast<Index>(MultiplyAdd::AccumulatorsPerWarp::kD);
220 
221  // If we do not have enough steps in the main loop, trigger the residue code.
222  if (params.k < kUnroll) {
223  global_stream.residue(params.k, true);
224  }
225 
226  // Fetch the fragments for A and B from global memory.
227  global_stream.copy();
228 
229  // Copy the elements to shared memory (after transformation if needed).
230  global_stream.commit();
231 
232  // Make sure the data is in shared memory.
233  Traits::shared_store_fence(false);
234 
235  // The unrolling steps for the main loop.
236  int const kUnrollingSteps =
237  MultiplyAdd::AccumulatorsPerWarp::kD / MultiplyAdd::InstructionShape::kD;
238 
239  // Make sure we have at least 2 unrolling steps or our pipeling is not going to work.
240  static_assert(kUnrollingSteps >= 2, "The pipelining assumes at least two steps");
241 
242  // The stream of data from shared memory to fragments.
243  typename Traits::SharedLoadStream shared_load_stream(params, shared_storage);
244 
245  // Trigger the copy from shared memory for the 1st stream.
246  shared_load_stream.copy(0);
247 
248  // Allocate the accumulators.
249  typename MultiplyAdd::Accumulators accumulators;
250  // Clear the accumulators.
251  clear.clear(accumulators);
252 
253  // Enter the main loop and iterate.
254  typedef typename Traits::Index Index;
255  for (Index outer_k = params.k - kUnroll; outer_k > -kUnroll; outer_k -= kUnroll) {
256  // If that's the last "load iteration" update the predicates.
257  int const is_residue = outer_k <= kUnroll;
258  if (is_residue) {
259  global_stream.residue(outer_k);
260  }
261 
262  // Load data for the next iteration of the main loop.
263  global_stream.copy();
264 
266  for (int step = 0; step < kUnrollingSteps - 1; ++step) {
267  // Trigger the copy from shared memory for the next A/B values.
268  shared_load_stream.copy(step + 1);
269  // Make sure the values are available for the current iteration to do the multiply-add.
270  shared_load_stream.commit(step);
271 
272  // Do the math on the fragments of the current iteration.
273  MultiplyAdd multiply_add;
274  multiply_add.multiply_add(shared_load_stream.fragment_a(step),
275  shared_load_stream.fragment_b(step),
276  accumulators,
277  accumulators);
278  }
279 
280  // Make sure the data from shared memory has been entirely consumed.
281  Traits::shared_load_fence(true);
282 
283  // Commit the data in shared memory for A/B.
284  global_stream.commit();
285 
286  // Make sure the data is in shared memory.
287  Traits::shared_store_fence(true);
288 
289  // Move to the next stage for the load (if it makes sense).
290  shared_load_stream.inc_stage();
291  // Trigger the copy from shared memory for the next loop iteration.
292  shared_load_stream.copy(0);
293  // Make sure the values are available for the current iteration to do the multiply-add.
294  shared_load_stream.commit(kUnrollingSteps - 1);
295 
296  // Do the math on the fragments of the current iteration.
297  MultiplyAdd multiply_add;
298  multiply_add.multiply_add(shared_load_stream.fragment_a(kUnrollingSteps - 1),
299  shared_load_stream.fragment_b(kUnrollingSteps - 1),
300  accumulators,
301  accumulators);
302  }
303 
304  // Epilogue.
305  typedef typename Traits::Epilogue Epilogue;
306  Epilogue epilogue(params.epilogue, shared_storage.epilogue, params.m, params.n);
307  epilogue.epilogue(cutlass::make_Coord(0, block.y, block.x), accumulators);
308  }
309 
311  Params const& params;
314 };
315 
317 
318 } // namespace gemm
319 } // namespace cutlass
Definition: gemm.h:56
+
Definition: convert.h:33
+
SharedStorage & shared_storage
The shared storage.
Definition: gemm.h:313
+
Traits::Epilogue::ScalarD ScalarD
The scalar for D.
Definition: gemm.h:99
+
Scalar_ beta
Definition: gemm.h:60
+
Index_ k
Definition: gemm.h:58
+
Traits::SharedStorage SharedStorage
The shared storage.
Definition: gemm.h:88
+
The params.
Definition: gemm.h:107
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Params const & params
The params.
Definition: gemm.h:311
+
Index_ m
The dimensions of the GEMM.
Definition: gemm.h:58
+
Traits::Epilogue::ScalarC ScalarC
The scalar for C.
Definition: gemm.h:97
+
Index_ ldb
The stride for B.
Definition: gemm.h:68
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
CUTLASS_DEVICE void multiply_add()
Do the GEMM.
Definition: gemm.h:197
+
GemmTraits_ Traits
The traits.
Definition: gemm.h:86
+
Traits::Epilogue::Scalar ScalarEpilogue
The scalar in the epilogue.
Definition: gemm.h:95
+
Index_ n
Definition: gemm.h:58
+
Traits::ScalarB ScalarB
The scalar for B.
Definition: gemm.h:93
+
Definition: clear_accumulators.h:38
+
void * d_d
The destination matrix D.
Definition: gemm.h:74
+
Definition: gemm.h:82
+
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:60
+
static __host__ cudaError_t launch(CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)
Launch the kernel.
Definition: gemm.h:164
+
void const * d_a
The source matrix A.
Definition: gemm.h:62
+
__global__ void gemm_kernel(typename Gemm_::Params params)
Definition: gemm.h:43
+
CUTLASS_HOST_DEVICE int initialize(Index m, Index n, Index k, ScalarEpilogue alpha, ScalarA const *d_a, Index lda, ScalarB const *d_b, Index ldb, ScalarEpilogue beta, ScalarC const *d_c, Index ldc, ScalarD *d_d, Index ldd)
Definition: gemm.h:108
+
Index_ lda
The stride for A.
Definition: gemm.h:64
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
Gemm< GemmTraits_ > This_
This class.
Definition: gemm.h:84
+
Index_ ldc
The stride for C.
Definition: gemm.h:72
+
CUTLASS_DEVICE Gemm(Params const &params_, SharedStorage &shared_storage_)
Ctor.
Definition: gemm.h:193
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Index_ ldd
The stride for D.
Definition: gemm.h:76
+
Traits::ScalarA ScalarA
The scalar for A.
Definition: gemm.h:91
+
CUTLASS_DEVICE void clear(Fragment_ &fragment)
Clear the fragment.
Definition: clear_accumulators.h:47
+
static int const kThreads
The number of threads.
Definition: gemm.h:104
+
Scalar_ alpha
The alpha/beta scaling values.
Definition: gemm.h:60
+
void const * d_c
The source matrix C.
Definition: gemm.h:70
+
static __host__ cudaError_t launch(Params const &params, cudaStream_t stream=cudaStreamDefault)
Launch the kernel.
Definition: gemm.h:141
+
Traits::Index Index
The index.
Definition: gemm.h:101
+
void const * d_b
The source matrix B.
Definition: gemm.h:66
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue_8h.html b/docs/generated-html/gemm__epilogue_8h.html new file mode 100644 index 0000000000..ad65bf56a9 --- /dev/null +++ b/docs/generated-html/gemm__epilogue_8h.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: gemm_epilogue.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_epilogue.h File Reference
+
+
+ +

Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the computed matrix product. +More...

+
#include <cutlass/convert.h>
+#include <cutlass/coord.h>
+#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+ + + + + + +

+Functions

template<typename T >
CUTLASS_DEVICE bool cutlass::gemm::is_zero (T x)
 
CUTLASS_DEVICE bool cutlass::gemm::is_zero (half x)
 
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue_8h_source.html b/docs/generated-html/gemm__epilogue_8h_source.html new file mode 100644 index 0000000000..c7be2683bd --- /dev/null +++ b/docs/generated-html/gemm__epilogue_8h_source.html @@ -0,0 +1,130 @@ + + + + + + + +Cutlass: gemm_epilogue.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_epilogue.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/convert.h>
33 #include <cutlass/coord.h>
34 #include <cutlass/fragment.h>
35 
36 namespace cutlass {
37 namespace gemm {
38 
40 
41 template <typename T>
42 CUTLASS_DEVICE bool is_zero(T x) {
43  return x == T(0);
44 }
45 
46 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
47 CUTLASS_DEVICE bool is_zero(half x) { return reinterpret_cast<int16_t&>(x) == int16_t(0); }
48 #endif
49 
51 
52 template <typename GemmEpilogueTraits_>
53 struct GemmEpilogue {
55  typedef GemmEpilogueTraits_ Traits;
57  typedef typename Traits::Params Params;
59  typedef typename Traits::SharedStorage SharedStorage;
60 
62  typedef typename Traits::OutputTile OutputTile;
64  typedef typename Traits::Iterations Iterations;
66  typedef typename Traits::Accumulators Accumulators;
68  typedef typename Traits::Scalar Scalar;
70  typedef typename Traits::Functor Functor;
71 
73  static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes");
74 
76  typedef typename Traits::GlobalLoadIteratorC GlobalLoadIteratorC;
78  typedef typename Traits::GlobalTransformerC GlobalTransformerC;
80  typedef typename Traits::GlobalTransformerD GlobalTransformerD;
82  typedef typename Traits::GlobalStoreIteratorD GlobalStoreIteratorD;
84  typedef typename Traits::SharedStoreIteratorD SharedStoreIteratorD;
86  typedef typename Traits::SharedStoreTransformerD SharedStoreTransformerD;
88  typedef typename Traits::SharedLoadIteratorD SharedLoadIteratorD;
91 
93  typedef typename Traits::Index Index;
94 
96  typedef typename GlobalLoadIteratorC::Scalar ScalarC;
98  typedef typename GlobalStoreIteratorD::Scalar ScalarD;
99 
101  CUTLASS_DEVICE GemmEpilogue(Params const& params_,
102  SharedStorage& shared_storage_,
103  Index m_,
104  Index n_)
105  : params(params_), shared_storage(shared_storage_), m(m_), n(n_) {}
106 
108  CUTLASS_DEVICE void epilogue(Coord<3> const& block, Accumulators& accumulators) {
109  if (is_zero(params.functor.beta)) {
110  epilogue_with_or_without_beta<true>(block, accumulators);
111  } else {
112  epilogue_with_or_without_beta<false>(block, accumulators);
113  }
114  }
115 
116  template <bool kBetaIsZero_>
117  CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord<3> const& block,
118  Accumulators& accumulators) {
119 
120  Coord<3> const bounds = cutlass::make_Coord(0, n, m);
121 
122  // The functor.
123  Functor functor(params.functor);
124  // The C fragment.
125  typename GlobalLoadIteratorC::Fragment fragment_c;
126  // The transformed C fragment.
127  typename GlobalTransformerC::OutputFragment transformed_c;
128 
130  for (int h = 0; h < Iterations::kH; ++h) {
131  // Compute pointer and predicate offsets for C and D global iterators.
132  int const pointer_offset =
133  ((params.iterator_d.inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) +
134  params.iterator_d.inc_advance) *
135  Iterations::kW +
136  params.stride_h) *
137  h;
138  int const predicate_offset =
139  ((params.iterator_d.predicate_inc_h * (GlobalStoreIteratorD::Iterations::kH - 1) +
140  params.iterator_d.predicate_inc_advance) *
141  Iterations::kW +
142  Traits::Delta::kH) *
143  h;
144 
145  // The iterator to load the elements of the C matrix.
146  GlobalLoadIteratorC global_load_iterator(
147  params.iterator_c, bounds, block, pointer_offset, predicate_offset);
148  // The transformer for C.
149  GlobalTransformerC transformer_c;
150  // The transformer for D.
151  GlobalTransformerD transformer_d;
152  // The iterator to store into the D matrix.
153  GlobalStoreIteratorD global_store_iterator(
154  params.iterator_d, bounds, block, pointer_offset, predicate_offset);
155 
157  for (int w = 0; w < Iterations::kW; ++w) {
158  // Load the C matrix into fragment.
159  if (!kBetaIsZero_) {
160  iterator_load(global_load_iterator, fragment_c);
161  }
162 
163  // Make sure we can write to shared memory.
165 
166  // Copy the accumulators to shared memory.
167  int const offset = (h * Iterations::kW + w) * SharedStoreIteratorD::Fragment::kElements;
168 
169  SharedStoreTransformerD shared_store_transformer;
170  typename SharedStoreTransformerD::OutputFragment shared_store_transformed_d;
171  shared_store_transformer.transform(accumulators, offset, shared_store_transformed_d);
172 
173  SharedStoreIteratorD shared_store_iterator(params.shared_store_iterator_d,
174  shared_storage.shared_stream.store);
175  shared_iterator_store(shared_store_iterator, shared_store_transformed_d);
176 
177  // Make sure the data is in shared memory.
179 
180  // Copy the accumulators back to registers from shared memory.
181  SharedLoadIteratorD shared_load_iterator(params.shared_load_iterator_d,
182  shared_storage.shared_stream.load);
183  typename SharedLoadIteratorD::Fragment fetched_d;
184  shared_iterator_load(shared_load_iterator, fetched_d);
185 
186  // Do the math.
187  typename GlobalTransformerD::InputFragment fragment_d;
188 
189  if (kBetaIsZero_) {
190  functor.evaluate(fetched_d, fragment_d);
191  } else {
192  // Transform C fragment.
193  transformer_c.transform(fragment_c, transformed_c);
194  // Do the math.
195  functor.evaluate(fetched_d, transformed_c, fragment_d);
196  }
197 
198  // Transform D fragment.
199  typename GlobalTransformerD::OutputFragment transformed_d;
200  transformer_d.transform(fragment_d, transformed_d);
201 
202  // Copy the results to global memory.
203  iterator_store(global_store_iterator, transformed_d);
204  }
205  }
206  }
207 
209  CUTLASS_DEVICE void shared_load_fence() { __syncthreads(); }
210 
212  CUTLASS_DEVICE void shared_store_fence() { __syncthreads(); }
213 
215  Params const& params;
220 };
221 
223 
224 } // namespace gemm
225 } // namespace cutlass
GlobalStoreIteratorD::Scalar ScalarD
The scalar for D.
Definition: gemm_epilogue.h:98
+
Traits::SharedStoreIteratorD SharedStoreIteratorD
The iterator to store D in shared memory.
Definition: gemm_epilogue.h:84
+
Definition: convert.h:33
+
CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from a shared memory input iterator.
Definition: iterator_access.h:75
+
Traits::Params Params
The params.
Definition: gemm_epilogue.h:57
+
Definition: gemm_epilogue.h:53
+
CUTLASS_DEVICE void epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)
Definition: gemm_epilogue.h:117
+
CUTLASS_DEVICE GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
Ctor.
Definition: gemm_epilogue.h:101
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Definition: convert.h:69
+
Traits::SharedStorage SharedStorage
The shared storage.
Definition: gemm_epilogue.h:59
+
Traits::GlobalTransformerD GlobalTransformerD
The transformer for D.
Definition: gemm_epilogue.h:80
+
Traits::OutputTile OutputTile
The output tile.
Definition: gemm_epilogue.h:62
+
Traits::Accumulators Accumulators
The accumulators.
Definition: gemm_epilogue.h:66
+
#define CUTLASS_PRAGMA_UNROLL
Definition: cutlass.h:60
+
CUTLASS_DEVICE void shared_load_fence()
The memory fence for shared loads.
Definition: gemm_epilogue.h:209
+
SharedStorage & shared_storage
The shared storage.
Definition: gemm_epilogue.h:217
+
GemmEpilogueTraits_ Traits
The traits class.
Definition: gemm_epilogue.h:55
+
CUTLASS_DEVICE bool is_zero(T x)
Definition: gemm_epilogue.h:42
+
Params const & params
The params.
Definition: gemm_epilogue.h:215
+
Traits::SharedLoadIteratorD SharedLoadIteratorD
The iterator to load D in shared memory.
Definition: gemm_epilogue.h:88
+
Traits::Index Index
The index.
Definition: gemm_epilogue.h:93
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Traits::SharedStoreTransformerD SharedStoreTransformerD
The shared store transformer for D.
Definition: gemm_epilogue.h:86
+
CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment)
Stores a fragment to a shared memory output iterator.
Definition: iterator_access.h:228
+
Traits::GlobalStoreIteratorD GlobalStoreIteratorD
The iterator for D in global memory.
Definition: gemm_epilogue.h:82
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:193
+
GlobalLoadIteratorC::Scalar ScalarC
The scalar for C.
Definition: gemm_epilogue.h:96
+
Index n
Definition: gemm_epilogue.h:219
+
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from an input iterator.
Definition: iterator_access.h:41
+
Traits::Functor Functor
The functor in charge of the math.
Definition: gemm_epilogue.h:70
+
Traits::Iterations Iterations
The number of iterations.
Definition: gemm_epilogue.h:64
+
CUTLASS_DEVICE void epilogue(Coord< 3 > const &block, Accumulators &accumulators)
Execute the epilogue.
Definition: gemm_epilogue.h:108
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
The shared load transformer for D.
Definition: gemm_epilogue.h:90
+
Traits::Scalar Scalar
The scalar.
Definition: gemm_epilogue.h:68
+
Defines conversion operations among Fragments of different base type.
+
Index m
The dimensions of the GEMM.
Definition: gemm_epilogue.h:219
+
CUTLASS_DEVICE void shared_store_fence()
The memory fence for shared stores.
Definition: gemm_epilogue.h:212
+
Traits::GlobalTransformerC GlobalTransformerC
The transformer for C.
Definition: gemm_epilogue.h:78
+
Traits::GlobalLoadIteratorC GlobalLoadIteratorC
We do not support 3D or 4D shapes.
Definition: gemm_epilogue.h:73
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue__traits_8h.html b/docs/generated-html/gemm__epilogue__traits_8h.html new file mode 100644 index 0000000000..cdb9863633 --- /dev/null +++ b/docs/generated-html/gemm__epilogue__traits_8h.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: gemm_epilogue_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_epilogue_traits.h File Reference
+
+
+ +

Defines structural properties of the GEMM epilogue. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
 
struct  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
 The params. More...
 
union  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage
 The shared memory storage to exchange data. More...
 
struct  cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage
 The shared memory to swizzle the data in the epilogue. More...
 
struct  cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
 
struct  cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__epilogue__traits_8h_source.html b/docs/generated-html/gemm__epilogue__traits_8h_source.html new file mode 100644 index 0000000000..3e10e801ba --- /dev/null +++ b/docs/generated-html/gemm__epilogue__traits_8h_source.html @@ -0,0 +1,160 @@ + + + + + + + +Cutlass: gemm_epilogue_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_epilogue_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/convert.h>
31 #include <cutlass/coord.h>
35 #include <cutlass/reshape_tile.h>
36 #include <cutlass/tile_iterator.h>
37 
38 namespace cutlass {
39 namespace gemm {
40 
42 
43 template <
45  typename OutputTile_,
47  typename Accumulators_,
49  typename GlobalLoadIteratorC_,
51  typename GlobalTransformerC_,
53  typename GlobalTransformerD_,
55  typename GlobalStoreIteratorD_,
57  typename SharedStoreIteratorD_,
59  typename SharedStoreTransformerD_,
61  typename SharedLoadIteratorD_,
63  typename Iterations_,
65  typename Delta_,
67  typename Functor_,
69  typename Index_ = int>
71  //
73  typedef OutputTile_ OutputTile;
76  typedef Accumulators_ Accumulators;
78  typedef GlobalLoadIteratorC_ GlobalLoadIteratorC;
80  typedef GlobalTransformerC_ GlobalTransformerC;
82  typedef GlobalTransformerD_ GlobalTransformerD;
84  typedef GlobalStoreIteratorD_ GlobalStoreIteratorD;
86  typedef SharedStoreIteratorD_ SharedStoreIteratorD;
88  typedef SharedStoreTransformerD_ SharedStoreTransformerD;
90  typedef SharedLoadIteratorD_ SharedLoadIteratorD;
92  typedef Iterations_ Iterations;
94  typedef Delta_ Delta;
95 
97  typedef Functor_ Functor;
99  typedef Index_ Index;
100 
102  static_assert(Iterations::kD == 1 && Iterations::kC == 1, "Unsupported 3D/4D shapes");
103 
105  typedef typename Functor::Scalar Scalar;
107  typedef typename GlobalLoadIteratorC::Scalar ScalarC;
109  typedef typename GlobalStoreIteratorD::Scalar ScalarD;
110 
112  struct Params {
116  typename GlobalLoadIteratorC::Params iterator_c;
118  typename GlobalStoreIteratorD::Params iterator_d;
120  typename SharedStoreIteratorD::Params shared_store_iterator_d;
122  typename SharedLoadIteratorD::Params shared_load_iterator_d;
124  typename Functor::Params functor;
125 
127  template <typename GemmDesc_>
128  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
129  // The parameters for the functor.
130  int error_code = functor.initialize(desc);
131  if (error_code) {
132  return error_code;
133  }
134 
135  // At the end of the H iteration, we jump over a number of columns.
136  this->stride_h = desc.ldd * Delta::kH;
137  // Nothing to do here.
138  this->stride_w = 0;
139 
140  // Setup the params for the global memory iterator for C.
141  error_code = iterator_c.initialize(
142  reinterpret_cast<ScalarC const*>(desc.d_c), desc.ldc, desc.n, stride_w, Delta::kW);
143  if (error_code) {
144  return error_code;
145  }
146 
147  // Setup the params for the global memory iterator for D.
148  return iterator_d.initialize(
149  reinterpret_cast<ScalarD*>(desc.d_d), desc.ldd, desc.n, stride_w, Delta::kW);
150  }
151  };
152 
155  // The storage for the store iterator.
156  typename SharedStoreIteratorD::SharedStorage store;
157  // The storage for the store iterator.
158  typename SharedLoadIteratorD::SharedStorage load;
159  };
160 
162  struct SharedStorage {
163  // The storage for the shared stream D.
165  };
166 };
167 
169 
170 template <typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
173  typedef typename EpilogueFunctor_::Scalar Scalar;
175  typedef typename GemmConfig_::OutputTile OutputTile;
176 
178  typedef Shape<1,
179  GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH /
180  GemmConfig_::kAccumulatorsPerLdsB,
181  GemmConfig_::kAccumulatorsPerLdsB>
183  // The iteration strides in the H/W dimension.
184  typedef Shape<0,
185  GemmConfig_::kAccumulatorsPerLdsB*(
186  GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1),
187  0>
190  typedef EpilogueFunctor_ Functor;
191 
194  // The pointer is float.
195  typename Functor::Scalar,
196  // The output tile size.
197  typename GemmConfig_::OutputTile,
198  // The number of warps.
199  typename GemmConfig_::Warps,
200  // The number of threads per warp.
201  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
202  // The number of scalars per STS.
203  GemmConfig_::kScalarsPerStsD,
204  // The skew -- 128 / sizeof(ScalarD) / kScalarsPerStsD is the number of threads involved in
205  // a single STS. We divide by 2 as our objective is to add a skew to the odd threads to
206  // avoid bank conflicts between odd and even threads.
207  128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 *
208  GemmConfig_::kScalarsPerStsD>
210 
217 
220 
223  // The pointer is float.
224  typename Functor::Scalar,
225  // The output tile size.
226  typename GemmConfig_::OutputTile,
227  // The number of warps.
228  typename GemmConfig_::Warps,
229  // The number of threads per warp.
230  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
231  // The number of columns of the output tile written by iteration.
232  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
233  // The number of scalars per LDS.
234  GemmConfig_::kScalarsPerLdsD,
235  // The skew.
238 
245 
247  typedef GemmGlobalTileCdTraits<
248  // The pointer is float const.
249  typename GemmConfig_::ScalarC const,
250  // The tile has size (N / Iterations)xM in GEMM's terminology.
251  Shape<1,
252  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
253  GemmConfig_::OutputTile::kW>,
254  // The threads are distributed as warps x 32 (the traits may reorganize).
256  // How many elements do we jump over at each iteration?
258  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
259  GemmConfig_::kScalarsPerLdgC>
261 
266 
268  typedef GemmGlobalTileCdTraits<
269  // The pointer is float.
270  typename GemmConfig_::ScalarD,
271  // The tile has size (N / Iterations)xM in GEMM's terminology.
272  Shape<1,
273  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
274  GemmConfig_::OutputTile::kW>,
275  // The threads are distributed as warps x 32 (the traits may reorganize).
277  // How many elements do we jump over at each iteration?
279  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
280  GemmConfig_::kScalarsPerStgD>
282 
287 };
288 
290 
291 template <
293  typename GemmConfig_,
295  typename EpilogueFunctor_,
297  typename Index_ = int,
301  // The output tile.
302  typename GemmConfig_::OutputTile,
303  // The accumulators.
304  typename GemmConfig_::Accumulators,
305  // The global iterator for C.
306  typename Helper_::GlobalLoadIteratorC,
307  // The transformer for C.
308  typename Helper_::GlobalTransformerC,
309  // The transformer for D.
310  typename Helper_::GlobalTransformerD,
311  // The global iterator for D.
312  typename Helper_::GlobalStoreIteratorD,
313  // The iterator to store D to shared memory.
314  typename Helper_::SharedStoreIteratorD,
315  // The shared store transformer for D.
316  typename Helper_::SharedStoreTransformerD,
317  // The iterator to load D from shared memory.
318  typename Helper_::SharedLoadIteratorD,
319  // The number of iterations.
320  typename Helper_::Iterations,
321  // The strides between iterations.
322  typename Helper_::Delta,
323  // The functor to be used in the epilogue.
324  EpilogueFunctor_,
325  // The index.
326  Index_> {};
327 
329 
330 } // namespace gemm
331 } // namespace cutlass
Definition: gemm_global_tile.h:116
+
SharedLoadIteratorD::SharedStorage load
Definition: gemm_epilogue_traits.h:158
+
Delta_ Delta
The iterations strides.
Definition: gemm_epilogue_traits.h:94
+
Definition: load_store.h:42
+
Definition: convert.h:33
+
GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerLdgC > GlobalLoadTileTraits
The traits class to build the iterator to load data from global memory for C^N.
Definition: gemm_epilogue_traits.h:260
+
Definition: gemm_epilogue_traits.h:171
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
GlobalTransformerC_ GlobalTransformerC
The transformer for C.
Definition: gemm_epilogue_traits.h:80
+
GlobalStoreIteratorD::Params iterator_d
The params for the D global iterator.
Definition: gemm_epilogue_traits.h:118
+
Implements the BLAS linear scaling function alpha*AB + beta*C.
+
The shared memory storage to exchange data.
Definition: gemm_epilogue_traits.h:154
+
GlobalLoadIteratorC::Scalar ScalarC
The scalar for C.
Definition: gemm_epilogue_traits.h:107
+
EpilogueFunctor_::Scalar Scalar
The scalar.
Definition: gemm_epilogue_traits.h:173
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
Index stride_h
The strides for H and W in the different iterations of the epilogue.
Definition: gemm_epilogue_traits.h:114
+
Index_ Index
The index.
Definition: gemm_epilogue_traits.h:99
+
GlobalStoreIteratorD_ GlobalStoreIteratorD
The iterator for D in global memory.
Definition: gemm_epilogue_traits.h:84
+
Definition: convert.h:69
+
OutputTile_ OutputTile
The output tile.
Definition: gemm_epilogue_traits.h:73
+
GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > GlobalLoadIteratorC
The iterator to load C.
Definition: gemm_epilogue_traits.h:263
+
Definition: tile_iterator.h:62
+
GlobalStoreIteratorD::Scalar ScalarD
The scalar for D.
Definition: gemm_epilogue_traits.h:109
+
TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorD
The iterator to store D to shared memory.
Definition: gemm_epilogue_traits.h:216
+
GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > GlobalStoreIteratorD
The iterator to store D.
Definition: gemm_epilogue_traits.h:284
+
Copy< typename SharedStoreIteratorD::Fragment > SharedStoreTransformerD
The shared store transformer for D.
Definition: gemm_epilogue_traits.h:219
+
Shape< 1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH/GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB > Iterations
The number of iterations in the epilogue.
Definition: gemm_epilogue_traits.h:182
+
GlobalLoadIteratorC::Params iterator_c
The params for the C iterator.
Definition: gemm_epilogue_traits.h:116
+
SharedStoreTransformerD_ SharedStoreTransformerD
The shared store transformer for D.
Definition: gemm_epilogue_traits.h:88
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
Setup the params.
Definition: gemm_epilogue_traits.h:128
+
GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerStgD > GlobalStoreTileTraits
The traits class to build the iterator to store data to global memory for D^N.
Definition: gemm_epilogue_traits.h:281
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
SharedStoreIteratorD_ SharedStoreIteratorD
The iterator to store D in shared memory.
Definition: gemm_epilogue_traits.h:86
+
SharedStoreIteratorD::SharedStorage store
Definition: gemm_epilogue_traits.h:156
+
GemmSharedStoreTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128/sizeof(typename GemmConfig_::ScalarD)/GemmConfig_::kScalarsPerStsD/2 *GemmConfig_::kScalarsPerStsD > SharedStoreTileTraits
The traits class to build the iterator to store to shared memory for D.
Definition: gemm_epilogue_traits.h:209
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:278
+
Defines a type for restructuring a tile.
+
Iterations_ Iterations
typedef typename GemmConfig::EpilogueIterations Iterations;
Definition: gemm_epilogue_traits.h:92
+
Definition: gemm_shared_tile.h:335
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:300
+
StreamSharedStorage shared_stream
Definition: gemm_epilogue_traits.h:164
+
EpilogueFunctor_ Functor
The functor to do the math in the epilogue.
Definition: gemm_epilogue_traits.h:190
+
TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
The iterator to load D from shared memory.
Definition: gemm_epilogue_traits.h:244
+
GemmConfig_::OutputTile OutputTile
The output tile.
Definition: gemm_epilogue_traits.h:175
+
GlobalLoadIteratorC_ GlobalLoadIteratorC
The iterator for C in global memory.
Definition: gemm_epilogue_traits.h:78
+
GlobalTransformerD_ GlobalTransformerD
The transformer for D.
Definition: gemm_epilogue_traits.h:82
+
Definition: gemm_epilogue_traits.h:70
+
Definition: gemm_global_tile.h:348
+
Index stride_w
Definition: gemm_epilogue_traits.h:114
+
static int const kW
The width of the cube.
Definition: shape.h:70
+
Functor::Scalar Scalar
We do not support 3D or 4D shapes.
Definition: gemm_epilogue_traits.h:102
+
Copy< typename GlobalStoreIteratorD::Fragment > GlobalTransformerD
The transformer for D.
Definition: gemm_epilogue_traits.h:286
+
Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
+
The params.
Definition: gemm_epilogue_traits.h:112
+
The shared memory to swizzle the data in the epilogue.
Definition: gemm_epilogue_traits.h:162
+
SharedLoadIteratorD_ SharedLoadIteratorD
The iterator to store D in shared memory.
Definition: gemm_epilogue_traits.h:90
+
Functor::Params functor
The functor params.
Definition: gemm_epilogue_traits.h:124
+
Copy< typename GlobalLoadIteratorC::Fragment > GlobalTransformerC
The transformer for C.
Definition: gemm_epilogue_traits.h:265
+
SharedLoadIteratorD::Params shared_load_iterator_d
The params for the D shared load iterator.
Definition: gemm_epilogue_traits.h:122
+
GemmSharedLoadTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for D.
Definition: gemm_epilogue_traits.h:237
+
Accumulators_ Accumulators
Definition: gemm_epilogue_traits.h:76
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:266
+
Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Defines conversion operations among Fragments of different base type.
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:337
+
SharedStoreIteratorD::Params shared_store_iterator_d
The params for the D shared store iterator.
Definition: gemm_epilogue_traits.h:120
+
Functor_ Functor
The functor in charge of the math.
Definition: gemm_epilogue_traits.h:97
+
Definition: gemm_shared_tile.h:264
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
+ + + + diff --git a/docs/generated-html/gemm__fragment__stream_8h.html b/docs/generated-html/gemm__fragment__stream_8h.html new file mode 100644 index 0000000000..6c4bbdec3c --- /dev/null +++ b/docs/generated-html/gemm__fragment__stream_8h.html @@ -0,0 +1,119 @@ + + + + + + + +Cutlass: gemm_fragment_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_fragment_stream.h File Reference
+
+
+ +

GEMM Fragment Stream maps the dimensions of the GEMM problem to the generic fragment stream. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
 Defines a FragmentStream by mapping GEMM dimensions onto contiguous and strided dimensions. More...
 
struct  cutlass::gemm::GemmFragmentStream< Traits_ >
 GEMM Fragment Stream. More...
 
struct  cutlass::gemm::GemmFragmentStream< Traits_ >::Params
 Parameters object. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__fragment__stream_8h_source.html b/docs/generated-html/gemm__fragment__stream_8h_source.html new file mode 100644 index 0000000000..db383ae4ef --- /dev/null +++ b/docs/generated-html/gemm__fragment__stream_8h_source.html @@ -0,0 +1,148 @@ + + + + + + + +Cutlass: gemm_fragment_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_fragment_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
31 #include <cutlass/matrix_traits.h>
32 
35 
36 namespace cutlass {
37 namespace gemm {
38 
40 
42 template <GemmOperand::Kind Usage, typename Scalar_, MatrixLayout::Kind Layout,
43  typename ThreadBlockTile_, int Threads, int ScalarsPerInst, typename Index_ = int,
44  typename DestinationSkew_ = Shape<0, 0, 0, 0> >
47  static GemmOperand::Kind const kUsage = Usage;
48 
50  typedef Scalar_ Scalar;
51 
53  static MatrixLayout::Kind const kLayout = Layout;
54 
56  typedef ThreadBlockTile_ ThreadBlockTile;
57 
59  static int const kThreads = Threads;
60 
62  static int const kAccessSize = ScalarsPerInst;
63 
65  typedef Index_ Index;
66 
68  typedef typename ShapeDiv<DestinationSkew_, Shape<ScalarsPerInst, ScalarsPerInst, ScalarsPerInst,
70 
73 
76 
79 
81  typedef TileTraitsDefault<VectorizedTile, kThreads> TileTraits;
82 
84  typedef FragmentStream<
85  TileTraits,
94 };
95 
97 template <typename Traits_>
99  : public FragmentStream<
100  typename Traits_::TileTraits,
101  TileLoadIterator<typename Traits_::TileTraits, typename Traits_::Scalar,
102  Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
103  : IteratorAdvance::kW,
104  MemorySpace::kGlobal, typename Traits_::Index>,
105  TileStoreIterator<typename Traits_::TileTraits, typename Traits_::Scalar,
106  Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
107  : IteratorAdvance::kW,
108  MemorySpace::kShared, typename Traits_::Index, typename Traits_::Scalar,
109  IteratorFragment::kScalar, typename Traits_::DestinationSkew> > {
111  typedef Traits_ Traits;
112 
114  typedef typename Traits::FragmentStream Base;
115 
116  //
117  // FragmentStream concept
118  //
119 
121  typedef typename Traits::Scalar Scalar;
122 
124  typedef typename Base::LoadIterator LoadIterator;
125 
127  typedef typename Base::StoreIterator StoreIterator;
128 
130  typedef typename Base::Convert Convert;
131 
133  typedef typename Base::Fragment Fragment;
134 
136  typedef typename Base::StoreFragment StoreFragment;
137 
139  typedef typename Base::Storage Storage;
140 
141  // Parameters type
142  // typedef typename Base::Params BaseParams;
143 
145  typedef typename Traits::Index Index;
146 
147  //
148  // Nested class definitions
149  //
150 
152  typedef typename Traits::Scalar const *Pointer;
153 
155  struct Params : public Base::Params {
156  //
157  // Methods
158  //
159 
161  template <typename GemmDesc_>
162  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc,
163  typename Traits::Scalar const *pointer, Index ldm) {
164  return this->load_params.initialize(pointer, ldm * Traits::MultiplicandTraits::Shape::kH, ldm,
165  Traits::kAccessSize);
166  }
167  };
168 
169  //
170  // Static member functions
171  //
172 
174  static CUTLASS_DEVICE void shared_store_fence() { Base::shared_store_fence(); }
175 
176  //
177  // Methods
178  //
179 
180  CUTLASS_DEVICE
182 
184  CUTLASS_DEVICE
185  GemmFragmentStream(Params const &params, Coord<3> const &bounds,
186  Coord<3> const &block_offset = make_Coord(0, 0, 0))
187  : Base(params, ProjectOperand<Traits::kUsage, Traits::MultiplicandTraits::kKstrided>::project(
188  bounds) +
189  make_Coord(1, 0, 0),
190  ProjectOperand<Traits::kUsage, Traits::MultiplicandTraits::kKstrided>::project(
191  block_offset)) {}
192 
194  CUTLASS_DEVICE
195  void load() { Base::load(); }
196 
198  CUTLASS_DEVICE
199  void commit() { Base::commit(); }
200 
202  CUTLASS_DEVICE
203  void residue(Coord<3> const &bounds, Coord<3> const &block_offset) {
204  this->initialize_predicates(bounds, block_offset);
205 
206  this->fetch.clear();
207  }
208 
210  CUTLASS_DEVICE
211  void initialize_predicates(Coord<3> const &bounds, Coord<3> const &block_offset) {
212  Base::initialize_predicates(
214  make_Coord(1, 0, 0),
216  block_offset));
217  }
218 };
219 
221 }
222 }
nv_std::conditional< kKstrided, Shape< 1, ThreadBlockTile::kD, GetExtent< Usage, ThreadBlockTile >::kExtent >, Shape< 1, GetExtent< Usage, ThreadBlockTile >::kExtent, ThreadBlockTile::kD > >::type Shape
Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand.
Definition: gemm_operand.h:86
+
static bool const kKstrided
Definition: gemm_operand.h:81
+
Scalar_ Scalar
Scalar data type.
Definition: gemm_fragment_stream.h:50
+
GemmMultiplicandTraits< ThreadBlockTile, kUsage, kLayout > MultiplicandTraits
Traits of multiplicand.
Definition: gemm_fragment_stream.h:72
+
static int const kAccessSize
Scalars per instruction.
Definition: gemm_fragment_stream.h:62
+
Definition: load_store.h:42
+
Definition: convert.h:34
+
Base::StoreIterator StoreIterator
Defines the store iterator.
Definition: gemm_fragment_stream.h:127
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc, typename Traits::Scalar const *pointer, Index ldm)
Initializes parameters.
Definition: gemm_fragment_stream.h:162
+
Defines structural properties of complete GEMM computation.
+
Traits::FragmentStream Base
Base class.
Definition: gemm_fragment_stream.h:114
+
An abstraction for implementing a stream loading a tile and storing a tile using a pair of tile itera...
+
Definition: load_store.h:43
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
ReshapeTile< ScalarTile, kAccessSize >::Tile VectorizedTile
Reshape for vectorized access.
Definition: gemm_fragment_stream.h:78
+
Traits::Index Index
Index type.
Definition: gemm_fragment_stream.h:145
+
FragmentStream< TileTraits, TileLoadIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index >, TileStoreIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Index, Scalar, IteratorFragment::kScalar, DestinationSkew > > FragmentStream
Define the tile stream.
Definition: gemm_fragment_stream.h:93
+
Traits_ Traits
Traits.
Definition: gemm_fragment_stream.h:111
+ +
Definition: tile_iterator.h:97
+
CUTLASS_DEVICE GemmFragmentStream()
Definition: gemm_fragment_stream.h:181
+
TileTraitsDefault< VectorizedTile, kThreads > TileTraits
Define structure of stripmined tile.
Definition: gemm_fragment_stream.h:81
+
MultiplicandTraits::Shape ScalarTile
Scalar tile shape.
Definition: gemm_fragment_stream.h:75
+
static CUTLASS_DEVICE void shared_store_fence()
The memory fence for shared stores.
Definition: gemm_fragment_stream.h:174
+
Defines a FragmentStream by mapping GEMM dimensions onto contiguous and strided dimensions.
Definition: gemm_fragment_stream.h:45
+
CUTLASS_DEVICE GemmFragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
Constructor - bounds and block offset are aligned to GEMM coordinates (K, N, M)
Definition: gemm_fragment_stream.h:185
+
Base::Fragment Fragment
Loaded fragment type.
Definition: gemm_fragment_stream.h:133
+
GEMM Fragment Stream.
Definition: gemm_fragment_stream.h:98
+
Traits::Scalar const * Pointer
The pointer.
Definition: gemm_fragment_stream.h:152
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:308
+
CUTLASS_DEVICE void commit()
Commits the fragment.
Definition: gemm_fragment_stream.h:199
+
Base::Storage Storage
Destination storage.
Definition: gemm_fragment_stream.h:139
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Manages a pair of iterators to stream data from global memory to shared.
Definition: fragment_stream.h:50
+
Definition: gemm_operand.h:66
+
static MatrixLayout::Kind const kLayout
Layout of the operand.
Definition: gemm_fragment_stream.h:53
+
Traits::Scalar Scalar
Scalar type.
Definition: gemm_fragment_stream.h:121
+
Parameters object.
Definition: gemm_fragment_stream.h:155
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:37
+
Index_ Index
Index type.
Definition: gemm_fragment_stream.h:65
+
Definition: shape.h:124
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:63
+
Base::Convert Convert
Converts between tiles.
Definition: gemm_fragment_stream.h:130
+
Definition: gemm_operand.h:94
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
static int const kThreads
Number of threads.
Definition: gemm_fragment_stream.h:59
+
Kind
Definition: matrix_traits.h:36
+
Base::StoreFragment StoreFragment
Stored fragment type.
Definition: gemm_fragment_stream.h:136
+
Base::LoadIterator LoadIterator
Defines the load iterator.
Definition: gemm_fragment_stream.h:124
+
static GemmOperand::Kind const kUsage
Indicates identity of multiplicand.
Definition: gemm_fragment_stream.h:47
+
Tile_ Tile
Definition: tile.h:43
+
Definition: tile_iterator.h:97
+
ShapeDiv< DestinationSkew_, Shape< ScalarsPerInst, ScalarsPerInst, ScalarsPerInst, 1 > >::Shape DestinationSkew
Skew added to shared memory tile.
Definition: gemm_fragment_stream.h:69
+
Kind
Definition: matrix_traits.h:43
+
CUTLASS_DEVICE void residue(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
TODO - Recomputes predicates and clears fetch registers.
Definition: gemm_fragment_stream.h:203
+
ThreadBlockTile_ ThreadBlockTile
Shape of the thread block tile (K, N, M)
Definition: gemm_fragment_stream.h:56
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
CUTLASS_DEVICE void initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
Recomputes predicates aligned to GEMM coordinates (K, N, M)
Definition: gemm_fragment_stream.h:211
+
Definition: tile_iterator.h:102
+
CUTLASS_DEVICE void load()
Loads the fragment.
Definition: gemm_fragment_stream.h:195
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:556
+
+ + + + diff --git a/docs/generated-html/gemm__global__stream_8h.html b/docs/generated-html/gemm__global__stream_8h.html new file mode 100644 index 0000000000..544d0f8b3e --- /dev/null +++ b/docs/generated-html/gemm__global__stream_8h.html @@ -0,0 +1,119 @@ + + + + + + + +Cutlass: gemm_global_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_global_stream.h File Reference
+
+
+ +

Implements efficient loading of the thread block-level tile from global memory and storing to shared memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
 
struct  cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params
 The params. More...
 
union  cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage
 The storage in shared memory needed by that stream. More...
 
struct  cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__global__stream_8h_source.html b/docs/generated-html/gemm__global__stream_8h_source.html new file mode 100644 index 0000000000..4eff93c20c --- /dev/null +++ b/docs/generated-html/gemm__global__stream_8h_source.html @@ -0,0 +1,130 @@ + + + + + + + +Cutlass: gemm_global_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_global_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/convert.h>
35 
36 namespace cutlass {
37 namespace gemm {
38 
40 
41 template <
43  typename LoadIterator_,
45  typename StoreIterator_,
47  typename Transformer_>
48 
51  typedef LoadIterator_ LoadIterator;
53  typedef Transformer_ Transformer;
55  typedef StoreIterator_ StoreIterator;
56 
58  typedef typename LoadIterator::Fragment FetchedFragment;
60  typedef typename Transformer::OutputFragment TransformedFragment;
63  "");
68  "");
69 
71  static MatrixLayout::Kind const kLayout = LoadIterator::kLayout;
73  typedef typename LoadIterator::Scalar Scalar;
75  typedef typename LoadIterator::Pointer Pointer;
77  typedef typename LoadIterator::Index Index;
78 
80  struct Params {
81  // The load iterator.
82  typename LoadIterator::Params load_iterator;
83  // The store iterator.
84  typename StoreIterator::Params store_iterator;
85 
88  int error_code = load_iterator.initialize(pointer, ld);
89  if (error_code) {
90  return error_code;
91  }
92 
93  return store_iterator.initialize();
94  }
95  };
96 
98  typedef typename StoreIterator::SharedStorage SharedStoreStorage;
99 
102  // The load iterator.
103  typename LoadIterator::SharedStorage load_iterator;
104  // The store iterator.
106  };
107 
109  CUTLASS_DEVICE GlobalLoadStreamBase(Params const& params,
110  SharedStorage& shared_storage,
111  Coord<3> const bounds,
112  Coord<3> const& block)
113  : load_iterator(params.load_iterator, bounds, block),
114  transformer(),
115  store_iterator(params.store_iterator, shared_storage.store_iterator)
116 
117  {
118  fetched_fragment.clear();
119  }
120 
122  CUTLASS_DEVICE void copy() { iterator_load(load_iterator, fetched_fragment); }
123 
125  CUTLASS_DEVICE void commit() {
128  store_iterator.inc_stage();
129  }
130 
132  CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) {
133  load_iterator.residue(k);
134  if (!skip_clear) {
135  fetched_fragment.clear();
136  }
137  }
138 
149 };
150 
152 
153 template <
155  typename LoadIterator_,
157  typename StoreIterator_,
159  typename Transformer_ = Copy<typename LoadIterator_::Fragment> >
160 
161 struct GlobalLoadStream : public GlobalLoadStreamBase<LoadIterator_, StoreIterator_, Transformer_> {
164 
166  CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const& params,
167  typename Base::SharedStorage& shared_storage,
168  Coord<3> const& bounds,
169  Coord<3> const& block)
170  : Base(params, shared_storage, bounds, block) {}
171 };
172 
174 } // namespace gemm
175 } // namespace cutlass
static MatrixLayout::Kind const kLayout
Make sure the transformed fragment is the same as the store fragment.
Definition: gemm_global_stream.h:71
+
StoreIterator::Params store_iterator
Definition: gemm_global_stream.h:84
+
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Transformer_ Transformer
The transformer.
Definition: gemm_global_stream.h:53
+
StoreIterator_ StoreIterator
The store iterator to write to shared memory.
Definition: gemm_global_stream.h:55
+
std::is_same (false specialization)
Definition: platform.h:412
+
StoreIterator::SharedStorage SharedStoreStorage
The amount of storage in shared memory needed to store the tile.
Definition: gemm_global_stream.h:98
+
TransformedFragment Fragment
Make sure the fragments match.
Definition: gemm_global_stream.h:63
+
TransformedFragment transformed_fragment
The fragment to convert the data after it has been fetched from shared memory.
Definition: gemm_global_stream.h:146
+
CUTLASS_DEVICE void residue(Index k, bool skip_clear=false)
Execute the residue code.
Definition: gemm_global_stream.h:132
+
Definition: convert.h:69
+
CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld)
Setup the params.
Definition: gemm_global_stream.h:87
+
LoadIterator load_iterator
The iterator.
Definition: gemm_global_stream.h:140
+
LoadIterator::Params load_iterator
Definition: gemm_global_stream.h:82
+
Definition: gemm_global_stream.h:161
+
Free functions for loading and storing to implementations of tile iteartor concepts.
+
LoadIterator::SharedStorage load_iterator
Definition: gemm_global_stream.h:103
+
CUTLASS_DEVICE GlobalLoadStream(typename Base::Params const &params, typename Base::SharedStorage &shared_storage, Coord< 3 > const &bounds, Coord< 3 > const &block)
Ctor.
Definition: gemm_global_stream.h:166
+
Definition: gemm_global_stream.h:49
+
StoreIterator store_iterator
The store iterator.
Definition: gemm_global_stream.h:148
+
LoadIterator::Pointer Pointer
The pointer.
Definition: gemm_global_stream.h:75
+
SharedStoreStorage store_iterator
Definition: gemm_global_stream.h:105
+
Transformer::OutputFragment TransformedFragment
The fragment that is obtained after the transformation by the transformer.
Definition: gemm_global_stream.h:60
+
LoadIterator::Scalar Scalar
The scalar type of the iterator.
Definition: gemm_global_stream.h:73
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
LoadIterator::Index Index
The index.
Definition: gemm_global_stream.h:77
+
Transformer transformer
The transformer.
Definition: gemm_global_stream.h:144
+
GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Base
The base class.
Definition: gemm_global_stream.h:163
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
LoadIterator::Fragment FetchedFragment
The fragment that is copied from shared memory.
Definition: gemm_global_stream.h:58
+
The storage in shared memory needed by that stream.
Definition: gemm_global_stream.h:101
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:193
+
FetchedFragment fetched_fragment
The fragment to fetch from shared memory.
Definition: gemm_global_stream.h:142
+
Kind
Definition: matrix_traits.h:36
+
LoadIterator_ LoadIterator
The load iterator.
Definition: gemm_global_stream.h:51
+
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from an input iterator.
Definition: iterator_access.h:41
+
CUTLASS_DEVICE void commit()
Commit the data.
Definition: gemm_global_stream.h:125
+
CUTLASS_DEVICE void copy()
Load the data from shared memory to the fetch fragment.
Definition: gemm_global_stream.h:122
+
CUTLASS_DEVICE GlobalLoadStreamBase(Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)
Ctor.
Definition: gemm_global_stream.h:109
+
Defines conversion operations among Fragments of different base type.
+
The params.
Definition: gemm_global_stream.h:80
+
+ + + + diff --git a/docs/generated-html/gemm__global__tile_8h.html b/docs/generated-html/gemm__global__tile_8h.html new file mode 100644 index 0000000000..39d4a36125 --- /dev/null +++ b/docs/generated-html/gemm__global__tile_8h.html @@ -0,0 +1,136 @@ + + + + + + + +Cutlass: gemm_global_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_global_tile.h File Reference
+
+
+ +

Defines iterators for efficiently loading and storing to global memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
 
struct  cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
 
struct  cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 
struct  cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
 
struct  cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
 
struct  cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params
 
struct  cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
 
struct  cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params
 The params. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__global__tile_8h_source.html b/docs/generated-html/gemm__global__tile_8h_source.html new file mode 100644 index 0000000000..d44c18ec4b --- /dev/null +++ b/docs/generated-html/gemm__global__tile_8h_source.html @@ -0,0 +1,215 @@ + + + + + + + +Cutlass: gemm_global_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_global_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/coord.h>
31 #include <cutlass/util/platform.h>
32 
34 #include <cutlass/matrix_traits.h>
36 #include <cutlass/reshape_tile.h>
37 #include <cutlass/tile_iterator.h>
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 // The following functor reshapes a tile of threads to match a tile of data. The idea is that when
45 // the user wants to build the iterator traits, he/she may want to specify the tile independently
46 // from the number of scalars loaded/stored per instruction. For example, in the row-major version
47 // with a tile of size 128x8 - the user may want to that the iterator works with 32x8 threads if
48 // each thread loads 1 scalar per LDG. If the user changes to 4 scalars per LDG, then the tile of
49 // threads has to change. The code below detects that and correct the code automatically - it is
50 // a helper when the user does not specify the right configuration.
51 
52 template <typename Tile_, typename Threads_, bool = (Tile_::kW < Threads_::kW)>
53 struct ReshapeThreads {
54  typedef Threads_ Threads;
55 };
56 
57 template <typename Tile_, typename Threads_>
59  typedef Shape<Threads_::kD, Threads_::kH * Threads_::kW / Tile_::kW, Tile_::kW, 1> Threads;
60 };
61 
63 
64 template <GemmOperand::Kind kOperand_,
65  MatrixLayout::Kind kLayout_,
66  typename Scalar_,
67  typename Tile_,
68  typename Threads_,
69  int kAccessSize_>
72  static GemmOperand::Kind const kOperand = kOperand_;
74  static MatrixLayout::Kind const kLayout = kLayout_;
78  typedef Scalar_* Pointer;
80  static int const kAccessSize = kAccessSize_;
83 
90 
96  typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize>
98 
100 
102  struct ThreadOffset {
105  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
106  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
107 
108  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
109  }
110  };
111 };
112 
114 
115 template <typename Scalar_, typename Tile_, typename Threads_, int kStrideH_, int kAccessSize_>
116 struct GemmGlobalTileCdTraits : public GemmGlobalTileTraits<GemmOperand::kC,
117  MatrixLayout::kColumnMajor,
118  Scalar_,
119  Tile_,
120  Threads_,
121  kAccessSize_> {
125  Scalar_,
126  Tile_,
127  Threads_,
128  kAccessSize_>
130 
132  static int const kStrideH = kStrideH_;
135 
136  typedef typename Base::Iterations Iterations;
137 
138  typedef typename Base::Threads Threads;
139 
141 
143 
145  struct ThreadOffset {
148  int thread_offset_h = threadIdx.x / Threads::kW * kStrideH * Iterations::kH;
149  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
150 
151  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
152  }
153  };
154 };
155 
157 
158 template <typename TileTraits_, typename Index_ = int>
160  : public TileLoadIterator<TileTraits_,
161  typename TileTraits_::Scalar,
162  TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
163  : IteratorAdvance::kW,
164  MemorySpace::kGlobal,
165  Index_> {
168 
169  typedef TileLoadIterator<TileTraits_,
170  typename TileTraits_::Scalar,
171  TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH
174  Index_>
177  static MatrixLayout::Kind const kLayout = TileTraits_::kLayout;
179  typedef typename Base::Fragment Fragment;
181  typedef typename TileTraits_::Scalar Scalar;
183  typedef typename TileTraits_::Threads Threads;
185  typedef Index_ Index;
187  typedef typename TileTraits_::ThreadOffset ThreadOffset;
190 
192 
194  typedef typename Base::Params BaseParams;
195 
196  struct Params : public BaseParams {
199  Index inc_d = 0;
200  Index inc_advance = 0;
201  // Move by some columns for each iteration in the H dimension.
202  Index inc_h = Base::Delta::kH * stride_h;
203 
204  // Move by some more columns in the number of iterations if the D dimension is > 1.
205  if (Base::Delta::kD > 0) {
206  inc_d = Base::Delta::kD * stride_h - (Base::Iterations::kH - 1) * inc_h;
207  }
208 
209  // Move to the beginning of the next iteration.
210  if (kAdvance == IteratorAdvance::kH && Base::Delta::kD > 0) {
211  inc_advance = inc_d;
212  } else if (kAdvance == IteratorAdvance::kH) {
213  inc_advance = inc_h;
214  } else if (Base::Delta::kD > 0) {
215  inc_advance = (Base::Iterations::kW + 0) * ShapeCount<typename Base::Delta>::kWc -
216  (Base::Iterations::kH - 1) * inc_h -
217  (Base::Iterations::kD - 1) * Base::Delta::kD * stride_h;
218  } else {
219  inc_advance = (Base::Iterations::kW + 0) * ShapeCount<typename Base::Delta>::kWc -
220  (Base::Iterations::kH - 1) * inc_h;
221  }
222 
224  return 0;
225  }
226  };
227 
232 
233  CUTLASS_DEVICE void initialize_predicates(const Coord<3>& bounds, const Coord<3>& block) {
234  // Setup the masks to control loads.
235  predicates.fill(0);
236 
237  int bounds_h, bounds_w;
238  if (kAdvance == IteratorAdvance::kH) {
239  bounds_w = bounds[2] - block[2];
240  bounds_h = bounds[1];
241 
242  } else {
243  bounds_w = bounds[1];
244  bounds_h = bounds[2] - block[1];
245  }
246 
247  // Fill in the bits of the predicate vector.
248  for (int d = 0; d < Base::Iterations::kD; ++d) {
249  for (int h = 0; h < Base::Iterations::kH; ++h) {
250  for (int w = 0; w < Base::Iterations::kW; ++w) {
251  for (int c = 0; c < Base::Iterations::kC; ++c) {
252  bool flag = w * Base::Delta::kW < bounds_w;
253  if (kAdvance == IteratorAdvance::kH) {
254  flag = flag && (h * Base::Delta::kH + d * Base::Delta::kD) < bounds_h;
255  } else {
256  flag = flag && (h * Base::Delta::kH) < bounds_h;
257  }
258  int const bit = ComputeOffsetFromShape<typename Base::Iterations>::get(d, h, w, c);
259  predicates.set(bit, flag);
260  }
261  }
262  }
263  }
264  }
265 
267  CUTLASS_DEVICE GemmGlobalIteratorAb(Params const& _params,
268  const Coord<3>& bounds,
269  const Coord<3>& block,
270  ThreadOffset thread_offset_func = ThreadOffset())
271  : params(_params) {
272  thread_offset = thread_offset_func();
273  // The column.
274  Index block_h = thread_offset[1];
275  // The contiguous dimension.
276  Index block_w = thread_offset[2];
277 
278  // Add the blocks indices.
279  if (kAdvance == IteratorAdvance::kH) {
280  block_h += block[1];
281  block_w += block[2];
282 
283  } else {
284  block_h += block[2];
285  block_w += block[1];
286  }
287 
288  // Setup the pointer.
289  params.pointer += (block_h * params.stride_h + block_w);
290 
291  // Initialize predicates
292  initialize_predicates(bounds, make_Coord(0, block_h, block_w));
293  }
294 
296  CUTLASS_DEVICE void inc_h() { params.pointer += params.inc_h; }
298  CUTLASS_DEVICE void inc_d() { params.pointer += params.inc_d; }
300  CUTLASS_DEVICE void inc_advance() { params.pointer += params.inc_advance; }
301 
304  Scalar const* data() const { return params.pointer; }
305 
307  CUTLASS_DEVICE void residue(Index k) {
308  // The coordinates of the thread.
309  Index block_h = thread_offset[1];
310  // The contiguous dimension.
311  Index block_w = thread_offset[2];
312 
313  // Update the predicate vector.
314  for (int d = 0; d < Base::Iterations::kD; ++d) {
315  for (int h = 0; h < Base::Iterations::kH; ++h) {
316  for (int w = 0; w < Base::Iterations::kW; ++w) {
317  for (int c = 0; c < Base::Iterations::kC; ++c) {
318  Index offset = 0;
319  if (kAdvance == IteratorAdvance::kH) {
320  offset += block_h + h * Base::Delta::kH + d * Base::Delta::kD;
321  } else {
322  offset += block_w + w * Base::Delta::kW;
323  }
324 
325  int const bit = ComputeOffsetFromShape<typename Base::Iterations>::get(d, h, w, c);
326  if (offset >= k) {
327  predicates.set(bit, false);
328  }
329  }
330  }
331  }
332  }
333  }
334 
336  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const {
337  int const bit = ComputeOffsetFromShape<typename Base::Iterations>::get(d, h, w, c);
338  return predicates[bit];
339  }
340 
343 };
344 
346 
347 template <typename TileTraits_, typename Index_ = int>
348 struct GemmGlobalIteratorCd : public TileIteratorBase<TileTraits_,
349  typename TileTraits_::Scalar,
350  IteratorAdvance::kH,
351  MemorySpace::kGlobal,
352  Index_> {
356  typedef TileIteratorBase<TileTraits_,
357  typename TileTraits_::Scalar,
360  Index_>
362 
364  static MatrixLayout::Kind const kLayout = TileTraits_::kLayout;
365 
367  typedef typename TileTraits_::Scalar Scalar;
369  typedef typename TileTraits_::Pointer Pointer;
371  typedef typename TileTraits_::Threads Threads;
373  typedef Index_ Index;
375  typedef typename TileTraits_::ThreadOffset ThreadOffset;
376 
378  struct Params {
389 
392  Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w) {
393  // The pointer.
394  this->pointer = pointer;
395  // Each column of the matrix.
396  stride_h = TileTraits_::ThreadsDelta::kH * ld;
397  // Each thread output 1 column per iteration. The stride between columns is given by the
398  // number of scalars that are loaded per LDS for B.
399  inc_h = ld * TileTraits_::kStrideH;
400  inc_advance =
401  (ld - ld * TileTraits_::kStrideH * (Base::Iterations::kH - 1)) + epilogue_stride_w;
402 
403  predicate_offset = bound;
404  predicate_inc_h = TileTraits_::kStrideH;
406  -((TileTraits_::kStrideH * (Base::Iterations::kH - 1) - 1) + epilogue_delta_w);
407 
408  return 0;
409  }
410  };
411 
415 
417  CUTLASS_DEVICE GemmGlobalIteratorCd() {}
418 
420  CUTLASS_DEVICE GemmGlobalIteratorCd(Params const& params,
421  const Coord<3>& bounds,
422  const Coord<3>& block,
423  int offset = 0,
424  int pred_offset = 0,
425  ThreadOffset thread_offset_func = ThreadOffset())
426  : params(params) {
427  thread_offset = thread_offset_func();
428  // Each warp works on a different column of the tile.
429  int const h = thread_offset[1] + block[1];
430  // Each lane writes a different element.
431  int const w = thread_offset[2] + block[2];
432  // Setup the pointer.
433  this->params.pointer += ((h * params.stride_h + w) + offset);
434 
435  // Prepare the vector of predicates.
436  for (int i = 0; i < Base::Iterations::kW; ++i) {
437  predicates.set(i, w + i * Base::Delta::kW < bounds[2]);
438  }
439  this->params.predicate_offset -= (h + pred_offset);
440  }
441 
443  CUTLASS_DEVICE void inc_c() {}
445  CUTLASS_DEVICE void inc_w() {}
447  CUTLASS_DEVICE void inc_h() {
450  }
452  CUTLASS_DEVICE void inc_d() {}
454  CUTLASS_DEVICE void inc_advance() {
457  }
458 
460  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const {
461  return predicates.at(w) && params.predicate_offset > 0;
462  }
463 
466  Pointer data() { return params.pointer; }
467 
469  Pointer const data() const { return params.pointer; }
470 
473 };
474 
476 
477 } // namespace gemm
478 } // namespace cutlass
Definition: gemm_global_tile.h:116
+
Shape< 0, Threads::kH, Threads::kW *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_global_tile.h:92
+
Index inc_advance
The strides to increment the pointer.
Definition: gemm_global_tile.h:384
+
CUTLASS_DEVICE void inc_d()
Increment the pointer in the D dimension.
Definition: gemm_global_tile.h:452
+
Definition: convert.h:33
+
cutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount > PredicateVector
Definition: gemm_global_tile.h:191
+
static MatrixLayout::Kind const kLayout
The layout.
Definition: gemm_global_tile.h:177
+
T type
Definition: platform.h:369
+
Base::Params BaseParams
Iterator parameters type.
Definition: gemm_global_tile.h:194
+
Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_global_tile.h:97
+
Index_ Index
The index.
Definition: gemm_global_tile.h:373
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
GemmGlobalIteratorCd< TileTraits_, Index_ > This_
This class.
Definition: gemm_global_tile.h:354
+
static MatrixLayout::Kind const kLayout
The layout.
Definition: gemm_global_tile.h:364
+
Definition: gemm_global_tile.h:70
+
Scalar_ * Pointer
The pointer.
Definition: gemm_global_tile.h:78
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
Kind
Definition: tile_iterator.h:62
+
CUTLASS_HOST_DEVICE bool at(int idx) const
Accesses a bit within the predicate vector.
Definition: predicate_vector.h:356
+
Definition: load_store.h:43
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
GemmMultiplicandTraits< Tile, kOperand, kLayout > MultiplicandTraits
Definition: gemm_global_tile.h:99
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_global_tile.h:82
+
TileIteratorBase< TileTraits_, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
The base class.
Definition: gemm_global_tile.h:361
+
Shape< 1, 1, Tile::kC > ThreadsDelta
The relative offset between two elements in the H/W dimension in adjacent threads.
Definition: gemm_global_tile.h:89
+
Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > Delta
Override the strides in each dimension between different loads/stores.
Definition: gemm_global_tile.h:134
+
Index predicate_inc_h
Definition: gemm_global_tile.h:386
+
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:166
+
CUTLASS_HOST_DEVICE Pointer const data() const
Definition: gemm_global_tile.h:469
+
CUTLASS_DEVICE void initialize_predicates(const Coord< 3 > &bounds, const Coord< 3 > &block)
Definition: gemm_global_tile.h:233
+
Definition: tile_iterator.h:62
+
static IteratorAdvance::Kind const kAdvance
Specifies in which dimension post-increment accesses advance.
Definition: tile_iterator.h:331
+
TileLoadIterator< TileTraits_, typename TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ > Base
The base class.
Definition: gemm_global_tile.h:175
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Is the iterator valid?
Definition: gemm_global_tile.h:336
+
Definition: gemm_global_tile.h:196
+
Definition: matrix_traits.h:43
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
Definition: gemm_global_tile.h:159
+
CUTLASS_DEVICE void inc_advance()
Increment the pointer to move to the next iteration.
Definition: gemm_global_tile.h:454
+
GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
The base class.
Definition: gemm_global_tile.h:129
+
Kind
Definition: load_store.h:40
+
Index stride_h
Definition: tile_iterator.h:172
+
static IteratorAdvance::Kind const kAdvance
Specifies in which dimension post-increment accesses advance.
Definition: gemm_global_tile.h:189
+
TileTraits_::Threads Threads
The threads.
Definition: gemm_global_tile.h:183
+
CUTLASS_HOST_DEVICE int initialize(Scalar const *ptr, Index stride_h)
Initializes params to load a strip-mined tile, given pointer and stride_h.
Definition: gemm_global_tile.h:198
+
CUTLASS_HOST_DEVICE int initialize()
Definition: tile_iterator.h:425
+
static int const kStrideH
The stride in the H dimension.
Definition: gemm_global_tile.h:132
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
Shape< Threads_::kD, Threads_::kH *Threads_::kW/Tile_::kW, Tile_::kW, 1 > Threads
Definition: gemm_global_tile.h:59
+
Index predicate_inc_advance
The strides to increment the predicate offset.
Definition: gemm_global_tile.h:386
+
static GemmOperand::Kind const kOperand
Identity of the operand.
Definition: gemm_global_tile.h:72
+
Index inc_h
Definition: tile_iterator.h:176
+
Defines container classes and iterators for managing a statically sized vector of boolean predicates...
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+ +
PredicateVector predicates
The predicates.
Definition: gemm_global_tile.h:342
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_global_tile.h:76
+
CUTLASS_HOST_DEVICE Scalar const * data() const
Returns the current pointer.
Definition: gemm_global_tile.h:304
+
Defines a type for restructuring a tile.
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Base::Fragment Fragment
Fragment type loaded by the iterator.
Definition: gemm_global_tile.h:179
+
TileTraits_::Threads Threads
The threads.
Definition: gemm_global_tile.h:371
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_global_tile.h:147
+
CUTLASS_DEVICE void inc_h()
Increment the pointer in the H dimension.
Definition: gemm_global_tile.h:447
+
CUTLASS_DEVICE GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
Ctor.
Definition: gemm_global_tile.h:420
+
Definition: gemm_operand.h:67
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_global_tile.h:102
+
Index inc_advance
Definition: tile_iterator.h:179
+
CUTLASS_DEVICE void residue(Index k)
That&#39;s the residue! Update the predicates.
Definition: gemm_global_tile.h:307
+
CUTLASS_HOST_DEVICE void fill(bool value=true)
Fills all predicates with a given value.
Definition: predicate_vector.h:343
+
CUTLASS_DEVICE GemmGlobalIteratorAb(Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &block, ThreadOffset thread_offset_func=ThreadOffset())
Ctor.
Definition: gemm_global_tile.h:267
+
CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w)
Setup the params.
Definition: gemm_global_tile.h:391
+
CUTLASS_DEVICE void inc_c()
Increment the pointer in the C dimension.
Definition: gemm_global_tile.h:443
+
CUTLASS_HOST_DEVICE Pointer data()
Returns the raw pointer.
Definition: gemm_global_tile.h:466
+
Scalar const * pointer
Pointer to memory.
Definition: tile_iterator.h:390
+
Base::Threads Threads
Definition: gemm_global_tile.h:138
+
Index stride_h
The stride in the H dimension to setup the thread in the block.
Definition: gemm_global_tile.h:382
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_global_tile.h:104
+
Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSize > ImmediateOffsetStrides
Strides for immediate offset computation.
Definition: gemm_global_tile.h:94
+
Statically sized array of bits implementing.
Definition: predicate_vector.h:104
+
CUTLASS_DEVICE void inc_h()
Increment the pointer in the H dimension.
Definition: gemm_global_tile.h:296
+
TileTraits_::ThreadOffset ThreadOffset
The thread offset.
Definition: gemm_global_tile.h:375
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Base::ImmediateOffsetStrides ImmediateOffsetStrides
Definition: gemm_global_tile.h:142
+
TileTraits_::Scalar Scalar
The scalar.
Definition: gemm_global_tile.h:367
+
Index inc_h
Definition: gemm_global_tile.h:384
+
cutlass::PredicateVector< Base::Iterations::kW > predicates
The predicates for the row.
Definition: gemm_global_tile.h:472
+
CUTLASS_DEVICE void inc_d()
Increment the pointer in the D dimension.
Definition: gemm_global_tile.h:298
+
Pointer pointer
The pointer.
Definition: gemm_global_tile.h:380
+
GemmGlobalIteratorAb< TileTraits_, Index_ > This_
This class.
Definition: gemm_global_tile.h:167
+ +
ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
The tile shape.
Definition: gemm_global_tile.h:85
+
Base::Fragment Fragment
Fragment definition.
Definition: tile_iterator.h:364
+
Iterator for accessing a stripmined tile in memory.
Definition: tile_iterator.h:102
+
CUTLASS_DEVICE void inc_w()
Increment the pointer in the W dimension.
Definition: gemm_global_tile.h:445
+
Params params
Definition: gemm_global_tile.h:412
+
Definition: gemm_global_tile.h:348
+
Definition: matrix_traits.h:36
+
Coord< 4 > thread_offset
Offset of an individual lane from the start of the tile.
Definition: gemm_global_tile.h:414
+
TileTraits_::ThreadOffset ThreadOffset
The thread offset.
Definition: gemm_global_tile.h:187
+
static int const kW
The width of the cube.
Definition: shape.h:70
+
CUTLASS_HOST_DEVICE void set(int idx, bool value=true)
Set a bit within the predicate vector.
Definition: predicate_vector.h:364
+
Parameters.
Definition: tile_iterator.h:388
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_global_tile.h:145
+
Kind
Definition: matrix_traits.h:36
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_global_tile.h:80
+
Tile_ Tile
Definition: reshape_tile.h:43
+
Definition: tile_iterator.h:62
+
Base::Iterations Iterations
Definition: gemm_global_tile.h:136
+
Index_ Index
The index.
Definition: gemm_global_tile.h:185
+
TileTraits_::Pointer Pointer
The pointer.
Definition: gemm_global_tile.h:369
+
Kind
Definition: matrix_traits.h:43
+
TileTraits_::Scalar Scalar
The scalar.
Definition: gemm_global_tile.h:181
+
Threads_ Threads
Definition: gemm_global_tile.h:54
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
CUTLASS_DEVICE void inc_advance()
Increment the pointer to move to the next iteration.
Definition: gemm_global_tile.h:300
+
CUTLASS_DEVICE GemmGlobalIteratorCd()
Ctor.
Definition: gemm_global_tile.h:417
+
Params params
The parameters.
Definition: gemm_global_tile.h:231
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
The params.
Definition: gemm_global_tile.h:378
+
Base::ThreadsDelta ThreadsDelta
Definition: gemm_global_tile.h:140
+
CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
Test the validity of the iterator.
Definition: gemm_global_tile.h:460
+
Coord< 4 > thread_offset
Offset of an individual lane from the start of the tile.
Definition: gemm_global_tile.h:229
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Index predicate_offset
The column offset to compute the predicate for the columns.
Definition: gemm_global_tile.h:388
+
Index inc_d
Definition: tile_iterator.h:175
+
static MatrixLayout::Kind const kLayout
The layout.
Definition: gemm_global_tile.h:74
+
+ + + + diff --git a/docs/generated-html/gemm__operand_8h.html b/docs/generated-html/gemm__operand_8h.html new file mode 100644 index 0000000000..17d7ebd303 --- /dev/null +++ b/docs/generated-html/gemm__operand_8h.html @@ -0,0 +1,134 @@ + + + + + + + +Cutlass: gemm_operand.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_operand.h File Reference
+
+
+ +

Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >
 Helper to describe attributes of GEMM matrix operands. More...
 
struct  cutlass::gemm::GetExtent< kOperand_, Tile_ >
 
struct  cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >
 
struct  cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >
 
struct  cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >
 
struct  cutlass::gemm::ProjectOperand< operand, Kstrided >
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >
 Project A operand - (0, K, M) More...
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >
 Project B operand - (0, K, N) More...
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kC, true >
 Project C operand - (0, N, M) More...
 
struct  cutlass::gemm::ProjectOperand< GemmOperand::kD, true >
 Project D operand - (0, N, M) More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__operand_8h_source.html b/docs/generated-html/gemm__operand_8h_source.html new file mode 100644 index 0000000000..83b58f2b67 --- /dev/null +++ b/docs/generated-html/gemm__operand_8h_source.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: gemm_operand.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_operand.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/matrix_traits.h>
32 #include <cutlass/reshape_tile.h>
33 #include <cutlass/util/platform.h>
34 
35 namespace cutlass {
36 namespace gemm {
37 
39 
41 template <GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_>
43  static const bool Congruous =
44  (kOperand_ == GemmOperand::kA ^ kLayout_ == MatrixLayout::kRowMajor);
45 };
46 
48 
49 template <typename GemmOperand::Kind kOperand_, typename Tile_>
50 struct GetExtent;
51 
52 template <typename Tile_>
53 struct GetExtent<GemmOperand::kA, Tile_> {
54  static const int kExtent = Tile_::kW;
55 };
56 
57 template <typename Tile_>
58 struct GetExtent<GemmOperand::kB, Tile_> {
59  static const int kExtent = Tile_::kH;
60 };
61 
63 
66 template <typename ThreadBlockTile_, GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
68  // Only defined for A or B
69  static_assert(Usage == GemmOperand::kA || Usage == GemmOperand::kB,
70  "MultiplicandTileShape defined only for A or B operands.");
71 
73  typedef ThreadBlockTile_ ThreadBlockTile;
74 
76  static GemmOperand::Kind const kUsage = Usage;
77 
79  static MatrixLayout::Kind const kLayout = Layout;
80 
81  // True if K is the strided dimension
83 
85  typedef typename platform::conditional<
86  kKstrided,
89 };
90 
92 
95 template <GemmOperand::Kind operand, bool Kstrided = true>
97 
99 template <bool Kstrided>
100 struct ProjectOperand<GemmOperand::kA, Kstrided> {
102  static Coord<3> project(Coord<3> const &coord) {
103  if (Kstrided) {
104  return make_Coord(0, coord[0], coord[2]);
105  } else {
106  return make_Coord(0, coord[2], coord[0]);
107  }
108  }
109 };
110 
112 template <bool Kstrided>
113 struct ProjectOperand<GemmOperand::kB, Kstrided> {
115  static Coord<3> project(Coord<3> const &coord) {
116  if (Kstrided) {
117  return make_Coord(0, coord[0], coord[1]);
118  } else {
119  return make_Coord(0, coord[1], coord[0]);
120  }
121  }
122 };
123 
125 template <>
126 struct ProjectOperand<GemmOperand::kC, true> {
128  static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); }
129 };
130 
132 template <>
133 struct ProjectOperand<GemmOperand::kD, true> {
135  static Coord<3> project(Coord<3> const &coord) { return make_Coord(0, coord[1], coord[2]); }
136 };
137 
139 
140 } // namespace gemm
141 } // namespace cutlass
static bool const kKstrided
Definition: gemm_operand.h:82
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:115
+
Definition: convert.h:33
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:128
+
C++ features that may be otherwise unimplemented for CUDA device functions.
+
ThreadBlockTile_ ThreadBlockTile
Shape of GEMM thread block tile (K, N, M)
Definition: gemm_operand.h:70
+
platform::conditional< kKstrided, Shape< 1, ThreadBlockTile::kD, GetExtent< Usage, ThreadBlockTile >::kExtent >, Shape< 1, GetExtent< Usage, ThreadBlockTile >::kExtent, ThreadBlockTile::kD > >::type Shape
Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand.
Definition: gemm_operand.h:88
+
Definition: matrix_traits.h:36
+
Defines a type for restructuring a tile.
+
Definition: gemm_operand.h:67
+
static const bool Congruous
Definition: gemm_operand.h:43
+
Definition: matrix_traits.h:43
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
std::conditional (true specialization)
Definition: platform.h:343
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
static MatrixLayout::Kind const kLayout
Layout of tile.
Definition: gemm_operand.h:79
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:102
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_operand.h:96
+
Definition: gemm_operand.h:50
+
Statically-sized array specifying Coords within a tensor.
Definition: coord.h:48
+
Gemm operand - D = A * B + C.
Definition: matrix_traits.h:42
+
Kind
Definition: matrix_traits.h:36
+
static CUTLASS_HOST_DEVICE Coord< 3 > project(Coord< 3 > const &coord)
Definition: gemm_operand.h:135
+
Kind
Definition: matrix_traits.h:43
+
Definition: matrix_traits.h:43
+
static GemmOperand::Kind const kUsage
Identifies multiplicand.
Definition: gemm_operand.h:76
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
Helper to describe attributes of GEMM matrix operands.
Definition: gemm_operand.h:42
+
+ + + + diff --git a/docs/generated-html/gemm__shared__stream_8h.html b/docs/generated-html/gemm__shared__stream_8h.html new file mode 100644 index 0000000000..314df24b74 --- /dev/null +++ b/docs/generated-html/gemm__shared__stream_8h.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: gemm_shared_stream.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_shared_stream.h File Reference
+
+
+ +

Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEMM pipeline. +More...

+ +

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
 
struct  cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params
 The params. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__shared__stream_8h_source.html b/docs/generated-html/gemm__shared__stream_8h_source.html new file mode 100644 index 0000000000..8a0ec9da8d --- /dev/null +++ b/docs/generated-html/gemm__shared__stream_8h_source.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: gemm_shared_stream.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_shared_stream.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
32 
33 namespace cutlass {
34 namespace gemm {
35 
37 
38 template <
40  typename Iterator_,
42  typename Transformer_ = Copy<typename Iterator_::Fragment> >
43 
46  typedef Iterator_ Iterator;
48  typedef Transformer_ Transformer;
49 
51  typedef typename Iterator::Fragment FetchedFragment;
53  typedef typename Transformer::OutputFragment TransformedFragment;
56  "");
59 
61  struct Params {
63  typename Iterator::Params iterator;
64 
66  CUTLASS_HOST_DEVICE int initialize() { return iterator.initialize(); }
67  };
68 
70  typedef typename Iterator::Storage SharedStorage;
71 
73  CUTLASS_DEVICE SharedLoadStream() {}
74 
76  CUTLASS_DEVICE SharedLoadStream(Params const &params, SharedStorage &shared_storage) {
77  this->initialize(params, shared_storage);
78  }
79 
81  CUTLASS_DEVICE void initialize(Params const &params, SharedStorage &shared_storage) {
82  // The iterator.
83  iterator = Iterator(params.iterator, shared_storage);
84  // The transformer.
86  }
87 
89  CUTLASS_DEVICE void copy(FetchedFragment &fetched) { shared_iterator_load(iterator, fetched); }
90 
92  CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched) {
93  shared_iterator_load(iterator, fetched, d);
94  }
95 
97  CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed) {
98  transformer.transform(fetched, transformed);
99  }
100 
102  CUTLASS_DEVICE void inc_stage() { iterator.inc_stage(); }
103 
108 };
109 
111 
112 } // namespace gemm
113 } // namespace cutlass
CUTLASS_DEVICE void copy(FetchedFragment &fetched)
Load the data from shared memory to the fetch fragment.
Definition: gemm_shared_stream.h:89
+
Definition: convert.h:33
+
CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from a shared memory input iterator.
Definition: iterator_access.h:75
+
CUTLASS_DEVICE void initialize(Params const &params, SharedStorage &shared_storage)
Initialize the stream.
Definition: gemm_shared_stream.h:81
+
std::is_same (false specialization)
Definition: platform.h:412
+
Iterator::Storage SharedStorage
The storage in shared memory needed by that stream.
Definition: gemm_shared_stream.h:70
+
CUTLASS_DEVICE void commit(FetchedFragment &fetched, TransformedFragment &transformed)
Commit the data.
Definition: gemm_shared_stream.h:97
+
CUTLASS_DEVICE void inc_stage()
Increment the stage.
Definition: gemm_shared_stream.h:102
+
CUTLASS_DEVICE SharedLoadStream()
Ctor.
Definition: gemm_shared_stream.h:73
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: gemm_shared_stream.h:44
+
Transformer transformer
The transformer.
Definition: gemm_shared_stream.h:107
+
TransformedFragment Fragment
Make sure the fragments match.
Definition: gemm_shared_stream.h:56
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_DEVICE void copy(int d, FetchedFragment &fetched)
Load the data from shared memory to the fetch fragment.
Definition: gemm_shared_stream.h:92
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Iterator::Params iterator
The iterator params.
Definition: gemm_shared_stream.h:63
+
Iterator iterator
The iterator.
Definition: gemm_shared_stream.h:105
+
CUTLASS_HOST_DEVICE int initialize()
Setup the params.
Definition: gemm_shared_stream.h:66
+
Transformer::OutputFragment TransformedFragment
The fragment that is obtained after the transformation by the transformer.
Definition: gemm_shared_stream.h:53
+
The params.
Definition: gemm_shared_stream.h:61
+
Iterator::Fragment FetchedFragment
The fragment that is copied from shared memory.
Definition: gemm_shared_stream.h:51
+
Transformer_ Transformer
The transformer.
Definition: gemm_shared_stream.h:48
+
Iterator_ Iterator
The load iterator.
Definition: gemm_shared_stream.h:46
+
CUTLASS_DEVICE SharedLoadStream(Params const &params, SharedStorage &shared_storage)
Ctor.
Definition: gemm_shared_stream.h:76
+
+ + + + diff --git a/docs/generated-html/gemm__shared__tile_8h.html b/docs/generated-html/gemm__shared__tile_8h.html new file mode 100644 index 0000000000..cf63242a39 --- /dev/null +++ b/docs/generated-html/gemm__shared__tile_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: gemm_shared_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_shared_tile.h File Reference
+
+
+ +

Defines iterators for efficiently loading and storing tiles to and from shared memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >
 
struct  cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset
 
struct  cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset
 
struct  cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
struct  cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
 
struct  cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__shared__tile_8h_source.html b/docs/generated-html/gemm__shared__tile_8h_source.html new file mode 100644 index 0000000000..7fe9d1ffff --- /dev/null +++ b/docs/generated-html/gemm__shared__tile_8h_source.html @@ -0,0 +1,214 @@ + + + + + + + +Cutlass: gemm_shared_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_shared_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerSts_>
42  typedef Scalar_* Pointer;
46  typedef Threads_ Threads;
48  typedef Shape<0, ShapeCount<Tile>::kWc, Tile::kC, kScalarsPerSts_> ThreadsStrides;
50  static int const kSkew = 0;
52  static int const kAccessSize = kScalarsPerSts_;
55 
57  typedef Shape<1,
58  Tile::kH / Threads::kH,
59  Tile::kW / Threads::kW,
60  Tile::kC / Threads::kC / kAccessSize>
67 
68  struct ThreadOffset {
70  Coord<4> operator()() const {
72  return make_Coord(0, 0, offset, 0);
73  }
74  };
75 };
76 
78 
79 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerSts_, int kSkew_>
84  typedef Scalar_* Pointer;
89  kScalarsPerSts_>::Tile Tile;
91  typedef Threads_ Threads;
93  static int const kSkew = kSkew_;
95  static int const kAccessSize = kScalarsPerSts_;
98 
100  typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> Iterations;
105 
106  struct ThreadOffset {
110  return make_Coord(0, 0, offset, 0);
111  }
112  };
113 
114  protected:
117 };
118 
120 
121 template <typename Scalar_,
122  typename OutputTile_,
123  typename Warps_,
124  typename ThreadsPerWarp_,
125  typename InstructionShape_,
126  int kStages_,
127  int kScalarsPerLds_,
128  int kSkew_ = 0>
134  typedef Scalar_* Pointer;
136  typedef Shape<kStages_,
137  OutputTile_::kD / InstructionShape_::kD,
138  GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD>
147  typedef Warps_ Warps;
149  typedef ThreadsPerWarp_ ThreadsPerWarp;
151  // static int const kScalarsPerLds = kScalarsPerLds_;
152  static int const kAccessSize = kScalarsPerLds_;
154  static int const kSkew = kSkew_;
157 
162 
164  typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kScalarsPerLds*/>
171 
173  struct ThreadOffset {
176  // Extract the warp.
177  int const warp = threadIdx.x / kWarpSize % Warps::kW;
178  // Compute the row offset for each thread
179  int const lane = (threadIdx.x & 0x0e) / 2;
180  // The offset.
181  int const offset = (warp * ThreadsPerWarp::kW + lane) * kAccessSize;
182 
183  return make_Coord(0, 0, offset, 0);
184  }
185  };
186 };
187 
189 
190 template <typename Scalar_,
191  typename OutputTile_,
192  typename Warps_,
193  typename ThreadsPerWarp_,
194  typename InstructionShape_,
195  int kStages_,
196  int kScalarsPerLds_,
197  int kSkew_ = 0>
203  typedef Scalar_* Pointer;
205  typedef Shape<kStages_,
206  OutputTile_::kD / InstructionShape_::kD,
207  GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD>
216  typedef Warps_ Warps;
218  typedef ThreadsPerWarp_ ThreadsPerWarp;
220  static int const kAccessSize = kScalarsPerLds_;
222  static int const kSkew = kSkew_;
225 
230 
232  typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp /* / kAccessSize*/> Iterations;
238 
240  struct ThreadOffset {
243  // The position of the warp.
244  int const warp = threadIdx.x / (Warps::kW * kWarpSize);
245 
246  // Compute the column offset for each thread
247  int const lane = (threadIdx.x & 0x10) / 8 + (threadIdx.x & 0x01);
248  // The offset.
249  int const offset = (warp * ThreadsPerWarp::kH + lane) * kAccessSize;
250 
251  return make_Coord(0, 0, offset, 0);
252  }
253  };
254 };
255 
257 
258 template <typename Scalar_,
259  typename OutputTile_,
260  typename Warps_,
261  typename ThreadsPerWarp_,
262  int kScalarsPerSts_,
263  int kSkew_ = 0>
268  typedef Scalar_* Pointer;
270  typedef OutputTile_ OutputTile;
272  typedef Warps_ Warps;
274  typedef ThreadsPerWarp_ ThreadsPerWarp;
276  static int const kAccessSize = kScalarsPerSts_;
278  static int const kSkew = kSkew_;
281 
283  static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW;
285  static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize;
287  static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew;
288 
297 
299  struct ThreadOffset {
302  // We issue STS.128 in the epilogue to store the accumulators to shared memory. When we use
303  // STS.128, we have to guarantee that threads in groups of 8 do not have bank conflicts (i.e
304  // they write to different banks).
305 
306  // Odd threads go to the second half of shared memory.
307  int const row = threadIdx.x & 0x01;
308 
309  int const warp_id = (threadIdx.x >> 5);
310 
311  int const warp_row = (warp_id % Warps::kW);
312  int const warp_col = (warp_id / Warps::kW);
313 
314  int hi_halfwarp_offset = OutputTile::kW * ((threadIdx.x >> 4) & 1);
315  int lo_halfwarp_offset = (((threadIdx.x >> 1) & 0x7) + warp_row * ThreadsPerWarp::kW);
316 
317  int col = kAccessSize * lo_halfwarp_offset +
318  warp_col * (ThreadsPerWarp::kH / 2) * OutputTile::kW + hi_halfwarp_offset;
319 
320  int offset = row * kScalarsPerRow + col;
321  return make_Coord(0, 0, offset, 0);
322  }
323  };
324 };
325 
327 
328 template <typename Scalar_,
329  typename OutputTile_,
330  typename Warps_,
331  typename ThreadsPerWarp_,
332  int kTileH_,
333  int kScalarsPerLds_,
334  int kSkew_ = 0>
339  typedef Scalar_* Pointer;
341  typedef OutputTile_ OutputTile;
343  typedef Warps_ Warps;
345  typedef ThreadsPerWarp_ ThreadsPerWarp;
347  static int const kAccessSize = kScalarsPerLds_;
349  static int const kSkew = kSkew_;
352 
354  static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW;
356  static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize;
358  static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew;
359 
362 
363  // Compute the number of iterations per warp in the Tile::kH dimension.
364  static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount;
365 
366  // As shown above, the shared memory tile is composed of 2 rows and each rows is made of
367  // kScalarsPerRow. A warp is expected to read from the 1st row, then move to the 2nd row and go
368  // back to the 1st row. To model that scheme we define the Iterations shape as Shape<X, 2, ...>.
369  // However, in some cases, we have only 1 iteration per warp. In that case, we must define the
370  // shape as Shape<1, 1, ...>. The following code does that.
371  static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2;
372  // As soon as we know kIterationsH, it is trivial to compute kIterationsD:
374 
376  typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kAccessSize> Iterations;
381 
383  struct ThreadOffset {
386  // Each warp works on a different column.
387  int const h = threadIdx.x / kWarpSize;
388  // Compute the row.
389  int const w = (threadIdx.x & (kWarpSize - 1)) * kAccessSize;
390  int offset = 0;
391  if (Iterations::kH == 1) {
392  int const row = h & 0x1;
393  int const col = h / 2;
394  offset = row * ShapeCount<Tile>::kWc + col * OutputTile::kW * Iterations::kD + w;
395  } else {
396  offset = h * OutputTile::kW * Iterations::kD + w;
397  }
398  return make_Coord(0, 0, offset, 0);
399  }
400  };
401 };
402 
404 
405 } // namespace gemm
406 } // namespace cutlass
static int const kAccessSize
The number of scalars per STS.
Definition: gemm_shared_tile.h:95
+
static CUTLASS_DEVICE int get()
Definition: shape.h:253
+
ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:214
+
ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:145
+
ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
The tile without skew after reshaping.
Definition: gemm_shared_tile.h:212
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:351
+
static int const kScalarsPerThread
The number of scalars per thread.
Definition: gemm_shared_tile.h:354
+
Definition: load_store.h:42
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:242
+
Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarp > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:232
+
Definition: convert.h:33
+
static int const kWarps
The number of warps.
Definition: gemm_shared_tile.h:227
+
Definition: gemm_shared_tile.h:129
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:42
+ +
static int const kScalarsPerRow
The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts).
Definition: gemm_shared_tile.h:287
+
T type
Definition: platform.h:369
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:132
+ +
Shape< 1, 1, kScalarsPerThread/kAccessSize > Iterations
The number of iterations needed to store the tile.
Definition: gemm_shared_tile.h:292
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:347
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in a warp.
Definition: gemm_shared_tile.h:149
+
Definition: reshape_tile.h:42
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Shape< 0, ShapeCount< Tile >::kWc, Tile::kC, kScalarsPerSts_ > ThreadsStrides
The strides to compute the base position of the thread.
Definition: gemm_shared_tile.h:48
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:276
+
Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kAccessSize > Iterations
The number of iterations needed to store the tile.
Definition: gemm_shared_tile.h:376
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:349
+
Warps_ Warps
The number of warps.
Definition: gemm_shared_tile.h:216
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:222
+
Definition: gemm_shared_tile.h:38
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:201
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:383
+
Definition: gemm_shared_tile.h:198
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:156
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:40
+
static GemmOperand::Kind const kOperand
Definition: gemm_shared_tile.h:130
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:280
+
Kind
Definition: load_store.h:40
+
Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
The tile with skew.
Definition: gemm_shared_tile.h:210
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:152
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/Threads::kC/kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:61
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:93
+
Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarp > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:165
+
OutputTile_ OutputTile
The dimension of the output tile.
Definition: gemm_shared_tile.h:270
+
static int const kScalarsPerRow
The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts).
Definition: gemm_shared_tile.h:358
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:203
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:134
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:268
+
static int const kScalarsPerThread
The number of scalars per thread.
Definition: gemm_shared_tile.h:283
+
Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:380
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:301
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:54
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:50
+
static int const kThreadsPerWarp
The number of threads in one dimension of the warp.
Definition: gemm_shared_tile.h:229
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:240
+
Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:104
+
Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSize > Tile
The tile.
Definition: gemm_shared_tile.h:290
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:52
+
ReshapeTile< Tile_, kScalarsPerSts_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:44
+ +
static int const kIterationsInHPerWarp
Definition: gemm_shared_tile.h:364
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:278
+
ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
The tile without skew after reshaping.
Definition: gemm_shared_tile.h:143
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:66
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:170
+
ReshapeTile< Shape< Tile_::kD, Tile_::kH, Tile_::kW+kSkew_ >, kScalarsPerSts_ >::Tile Tile
The tile.
Definition: gemm_shared_tile.h:89
+
Shape< 0, kScalarsPerSts_, ShapeCount< Tile >::kHwc/Threads::kW > ThreadsStrides
The strides to compute the base position of the thread.
Definition: gemm_shared_tile.h:116
+
ReshapeTile< Tile_, kScalarsPerSts_ >::Tile TileWithoutSkew
The tile without skews.
Definition: gemm_shared_tile.h:86
+
static int const kIterationsD
Definition: gemm_shared_tile.h:373
+
static int const kWarps
The number of warps.
Definition: gemm_shared_tile.h:159
+
Definition: matrix_traits.h:43
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in the warps.
Definition: gemm_shared_tile.h:274
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:173
+
Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
The tile without skew.
Definition: gemm_shared_tile.h:139
+
Definition: gemm_shared_tile.h:335
+
Threads_ Threads
The threads.
Definition: gemm_shared_tile.h:91
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
OutputTile_ OutputTile
The dimension of the output tile.
Definition: gemm_shared_tile.h:341
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:82
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:167
+
Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSize > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:296
+
static GemmOperand::Kind const kOperand
Definition: gemm_shared_tile.h:199
+
Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSize > Tile
The tile.
Definition: gemm_shared_tile.h:361
+
static int const kThreadsPerWarp
The number of threads in one dimension of the warp.
Definition: gemm_shared_tile.h:161
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:84
+
Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:378
+
Shape< 1, TileWithoutSkew::kH/Threads::kW, TileWithoutSkew::kW/Threads::kH > Iterations
The number of iterations needed to load/store the tile.
Definition: gemm_shared_tile.h:100
+
Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
The tile without skew.
Definition: gemm_shared_tile.h:208
+
Threads_ Threads
The threads.
Definition: gemm_shared_tile.h:46
+
Definition: gemm_operand.h:50
+ +
Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:63
+
static int const kThreads
The number of threads.
Definition: gemm_shared_tile.h:356
+
Warps_ Warps
The number of warps.
Definition: gemm_shared_tile.h:147
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:97
+
static MemorySpace::Kind const kMemorySpace
The memory space.
Definition: gemm_shared_tile.h:224
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:70
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:175
+
static int const kD
The depth of the cube.
Definition: shape.h:66
+
Computes the thread offset in (H, W) based on thread ID.
Definition: gemm_shared_tile.h:299
+
Warps_ Warps
The warps in the tile.
Definition: gemm_shared_tile.h:343
+
Tile_ Tile
Definition: reshape_tile.h:43
+
Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:102
+
static int const kIterationsH
Definition: gemm_shared_tile.h:371
+
Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:294
+
Kind
Definition: matrix_traits.h:43
+
static int const kSkew
The skew.
Definition: gemm_shared_tile.h:154
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in the warps.
Definition: gemm_shared_tile.h:345
+
Definition: matrix_traits.h:43
+
Scalar_ * Pointer
The pointer.
Definition: gemm_shared_tile.h:339
+
static int const kThreads
The number of threads.
Definition: gemm_shared_tile.h:285
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:234
+
ThreadsPerWarp_ ThreadsPerWarp
The threads in a warp.
Definition: gemm_shared_tile.h:218
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:266
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
The tile with skew.
Definition: gemm_shared_tile.h:141
+
Warps_ Warps
The warps in the tile.
Definition: gemm_shared_tile.h:272
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:108
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:337
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: gemm_shared_tile.h:385
+
Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
The strides in each dimension between different loads/stores.
Definition: gemm_shared_tile.h:237
+
Definition: gemm_shared_tile.h:264
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_shared_tile.h:220
+
+ + + + diff --git a/docs/generated-html/gemm__traits_8h.html b/docs/generated-html/gemm__traits_8h.html new file mode 100644 index 0000000000..d782b37802 --- /dev/null +++ b/docs/generated-html/gemm__traits_8h.html @@ -0,0 +1,151 @@ + + + + + + + +Cutlass: gemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
gemm_traits.h File Reference
+
+
+ +

Defines structural properties of complete GEMM computation. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
 The params. More...
 
union  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage
 
union  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage
 The storage in shared memory. More...
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream
 Assemble the global load streams for A/B. More...
 
struct  cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
 Assemble the shared load stream for A/B. More...
 
struct  cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
 
struct  cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/gemm__traits_8h_source.html b/docs/generated-html/gemm__traits_8h_source.html new file mode 100644 index 0000000000..7ca9f4b8d6 --- /dev/null +++ b/docs/generated-html/gemm__traits_8h_source.html @@ -0,0 +1,252 @@ + + + + + + + +Cutlass: gemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
gemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/convert.h>
36 #include <cutlass/matrix_traits.h>
37 #include <cutlass/reshape_tile.h>
38 #include <cutlass/tile_iterator.h>
39 
40 namespace cutlass {
41 namespace gemm {
42 
44 
45 template <
47  typename ScalarA_,
49  typename ScalarB_,
51  typename ScalarC_,
53  typename ScalarD_,
55  typename OutputTile_,
57  typename MultiplyAdd_,
59  int kScalarsPerLdgA_,
61  int kScalarsPerStsA_,
63  int kScalarsPerLdsA_,
65  int kScalarsPerLdgB_,
67  int kScalarsPerStsB_,
69  int kScalarsPerLdsB_,
71  int kScalarsPerLdgCAndStgD_,
73  int kScalarsPerStsD_,
75  int kScalarsPerLdsD_,
77  int kStages_>
78 
79 struct GemmConfig {
80  //
82  typedef ScalarA_ ScalarA;
84  typedef ScalarB_ ScalarB;
86  typedef ScalarC_ ScalarC;
88  typedef ScalarD_ ScalarD;
89 
91  typedef OutputTile_ OutputTile;
93  typedef MultiplyAdd_ MultiplyAdd;
100 
104  static int const kWarpSize = cutlass::kWarpSize;
107 
109  static int const kScalarsPerLdgA = kScalarsPerLdgA_;
110  static int const kScalarsPerStsA = kScalarsPerStsA_;
111  static int const kScalarsPerLdsA = kScalarsPerLdsA_;
112 
114  static int const kScalarsPerLdgB = kScalarsPerLdgB_;
115  static int const kScalarsPerStsB = kScalarsPerStsB_;
116  static int const kScalarsPerLdsB = kScalarsPerLdsB_;
117 
119  static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_;
120 
122  static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_;
123  static int const kScalarsPerStsD = kScalarsPerStsD_;
124  static int const kScalarsPerLdsD = kScalarsPerLdsD_;
125 
127  static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD;
128  static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD;
129 
131  static int const kStages = kStages_;
132 };
133 
135 
136 template <enum MatrixLayout::Kind, typename GemmConfig_>
138 
140 
141 template <typename GemmConfig_>
142 struct GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
145 
147  typedef typename GemmConfig_::ScalarA Scalar;
149  typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar;
150 
152  typedef GemmGlobalTileTraits<
153  // That's A.
155  // A is column-major.
157  // The pointer is float const.
158  Scalar const,
159  // The tile has size KxM in GEMM's terminology.
161  // The threads are distributed as warps x 32 (the traits may reorganize).
163  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
164  GemmConfig_::kScalarsPerLdgA>
166 
169  // The pointer is float.
171  // The tile has size KxM in GEMM's terminology.
172  Shape<GemmConfig_::kStages,
173  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
174  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
175  // The threads are distributed as warps x 32 (the traits may reorganize).
176  typename GlobalTileTraits::Threads,
177  // The number of scalars per STS (STS.32 or STS.128, etc).
178  GemmConfig_::kScalarsPerStsA>
180 
183  // The pointer is float const.
184  MultiplyAddScalar const,
185  // The output tile size.
186  typename GemmConfig_::OutputTile,
187  // The number of warps.
188  typename GemmConfig_::Warps,
189  // The number of threads per warp.
190  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
191  // The shape of the FMA instruction.
192  typename GemmConfig_::InstructionShape,
193  // The number of stages.
194  GemmConfig_::kStages,
195  // The number of scalars per LDS.
196  GemmConfig_::kScalarsPerLdsA,
197  // The skew.
198  0>
200 };
201 
203 
204 template <typename GemmConfig_>
205 struct GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
208 
210  typedef typename GemmConfig_::ScalarA Scalar;
212  typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar;
213 
215  typedef GemmGlobalTileTraits<
216  // That's A.
218  // A is row-major.
220  // The pointer is float const.
221  Scalar const,
222  // The tile has size MxK in GEMM's terminology.
224  // The threads are distributed as (threads / K) x K (the traits may reorganize).
225  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
226  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
227  GemmConfig_::kScalarsPerLdgA>
229 
231  static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar);
234  // The pointer is float.
236  // The tile has size KxM in GEMM's terminology.
237  Shape<GemmConfig_::kStages,
238  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
239  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
240  // The threads are distributed as (threads / K) x K (the traits may reorganize).
241  typename GlobalTileTraits::Threads,
242  // The number of scalars per STS.
243  GemmConfig_::kScalarsPerStsA,
244  // The skew to avoid bank conflicts added in the tile W dimension.
245  128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsA /
246  GlobalTileTraits::Threads::kW * kScalarsIn4B>
248 
251  // The pointer is float const.
252  MultiplyAddScalar const,
253  // The output tile size.
254  typename GemmConfig_::OutputTile,
255  // The number of warps.
256  typename GemmConfig_::Warps,
257  // The number of threads per warp.
258  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
259  // The shape of the FMA instruction.
260  typename GemmConfig_::InstructionShape,
261  // The number of stages.
262  GemmConfig_::kStages,
263  // The number of scalars per LDS.
264  GemmConfig_::kScalarsPerLdsA,
265  // The skew.
266  SharedStoreTileTraits::kSkew>
268 };
269 
271 
272 template <enum MatrixLayout::Kind, typename GemmConfig_>
274 
276 
277 template <typename GemmConfig_>
278 struct GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
281 
283  typedef typename GemmConfig_::ScalarB Scalar;
285  typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar;
286 
288  typedef GemmGlobalTileTraits<
289  // That's B.
291  // B is column-major.
293  // The pointer is float const.
294  Scalar const,
295  // The tile has size MxK in GEMM's terminology.
297  // The threads are distributed as (threads / K) x K (the traits may reorganize).
298  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
299  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
300  GemmConfig_::kScalarsPerLdgB>
302 
304  static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar);
307  // The pointer is float.
309  // The tile has size KxN in GEMM's terminology.
310  Shape<GemmConfig_::kStages,
311  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
312  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
313  // The threads are distributed as (threads / K) x K (the traits may reorganize).
314  typename GlobalTileTraits::Threads,
315  // The number of scalars per STS.
316  GemmConfig_::kScalarsPerStsB,
317  // The skew to avoid bank conflicts added in the tile W dimension.
318  128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsB /
319  GlobalTileTraits::Threads::kW * kScalarsIn4B>
321 
324  // The pointer is float const.
325  MultiplyAddScalar const,
326  // The output tile size.
327  typename GemmConfig_::OutputTile,
328  // The number of warps.
329  typename GemmConfig_::Warps,
330  // The number of threads per warp.
331  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
332  // The shape of the FMA instruction.
333  typename GemmConfig_::InstructionShape,
334  // The number of stages.
335  GemmConfig_::kStages,
336  // The number of scalars per LDS.
337  GemmConfig_::kScalarsPerLdsB,
338  // The skew.
339  SharedStoreTileTraits::kSkew>
341 };
342 
344 
345 template <typename GemmConfig_>
346 struct GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
349 
351  typedef typename GemmConfig_::ScalarB Scalar;
353  typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar;
354 
356  typedef GemmGlobalTileTraits<
357  // That's B.
359  // B is row-major.
361  // The pointer is float const.
362  Scalar const,
363  // The tile has size KxN in GEMM's terminology.
365  // The threads are distributed as warps x 32 (the traits may reorganize).
367  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
368  GemmConfig_::kScalarsPerLdgB>
370 
373  // The pointer is float.
375  // The tile has size KxN in GEMM's terminology.
376  Shape<GemmConfig_::kStages,
377  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
378  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
379  // The threads are distributed as warps x 32 (the traits may reorganize).
380  typename GlobalTileTraits::Threads,
381  // The number of scalars per STS (STS.32 or STS.128, etc).
382  GemmConfig_::kScalarsPerStsB>
384 
387  // The pointer is float const.
388  MultiplyAddScalar const,
389  // The output tile size.
390  typename GemmConfig_::OutputTile,
391  // The number of warps.
392  typename GemmConfig_::Warps,
393  // The number of threads per warp.
394  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
395  // The shape of the FMA instruction.
396  typename GemmConfig_::InstructionShape,
397  // The number of stages.
398  GemmConfig_::kStages,
399  // The number of scalars per LDS.
400  GemmConfig_::kScalarsPerLdsB,
401  // The skew.
402  0>
404 };
405 
407 
408 template <
410  typename GemmConfig_,
412  typename GlobalLoadStreamA_,
414  typename GlobalLoadStreamB_,
416  typename SharedLoadStreamA_,
418  typename SharedLoadStreamB_,
420  typename Epilogue_,
422  typename BlockSwizzle_ = IdentityBlockSwizzle,
424  typename Index_ = int,
427 
428 struct GemmTraits {
430  typedef GemmConfig_ GemmConfig;
433 
435  typedef GlobalLoadStreamA_ GlobalLoadStreamA;
437  static MatrixLayout::Kind const kLayoutA = GlobalLoadStreamA::kLayout;
439  typedef typename GlobalLoadStreamA_::Scalar ScalarA;
440 
442  typedef GlobalLoadStreamB_ GlobalLoadStreamB;
444  static MatrixLayout::Kind const kLayoutB = GlobalLoadStreamB::kLayout;
446  typedef typename GlobalLoadStreamB_::Scalar ScalarB;
447 
449  typedef SharedLoadStreamA_ SharedLoadStreamA;
451  typedef SharedLoadStreamB_ SharedLoadStreamB;
452 
454  typedef typename GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA;
455  // Btw, make sure we did not messed up with the size of the storage.
456  static_assert(sizeof(SharedStoreStorageA) == sizeof(typename SharedLoadStreamA::SharedStorage),
457  "");
458 
460  typedef typename GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB;
461  // Btw, make sure we did not messed up with the size of the storage.
462  static_assert(sizeof(SharedStoreStorageB) == sizeof(typename SharedLoadStreamB::SharedStorage),
463  "");
464 
466  typedef typename GemmConfig::MultiplyAdd MultiplyAdd;
468  typedef Epilogue_ Epilogue;
470  typedef typename Epilogue::ScalarC ScalarC;
471  typedef typename Epilogue::ScalarD ScalarD;
472 
474  typedef BlockSwizzle_ BlockSwizzle;
476  typedef Index_ Index;
478  typedef ClearAccumulators_ ClearAccumulators;
479 
481  struct Params {
483  Index m, n, k;
485  typename GlobalLoadStreamA::Params global_stream_a;
487  typename GlobalLoadStreamB::Params global_stream_b;
489  typename SharedLoadStreamA::Params shared_stream_a;
491  typename SharedLoadStreamB::Params shared_stream_b;
493  typename Epilogue::Params epilogue;
494 
496  template <typename GemmDesc_>
497  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
498  // Set the problem size.
499  this->m = desc.m;
500  this->n = desc.n;
501  this->k = desc.k;
502 
503  // Initialize the iterator for A.
504  int error_code =
505  global_stream_a.initialize(reinterpret_cast<ScalarA const*>(desc.d_a), desc.lda);
506 
507  if (error_code) {
508  return error_code;
509  }
510 
511  // Initialize the iterator for B.
512  error_code = global_stream_b.initialize(reinterpret_cast<ScalarB const*>(desc.d_b), desc.ldb);
513 
514  if (error_code) {
515  return error_code;
516  }
517 
518  // The epilogue.
519  return epilogue.initialize(desc);
520  }
521  };
522 
523  // The storage for A.
524  template <typename GlobalLoadStream_, typename SharedLoadStream_>
526  // The storage needed by the global stream.
527  typename GlobalLoadStream_::SharedStorage global;
528  // The storage needed by the shared stream.
529  typename SharedLoadStream_::SharedStorage shared;
530  };
531 
532  // The storage for the main loop + prologue.
534  // The storage to shuffle the A matrix in shared memory.
536  // The storage to shuffle the B matrix in shared memory.
538  // The storage to clear the accumulators if needed.
540  };
541 
544  // The storage for the main loop.
546  // The storage for the epilogue.
547  typename Epilogue::SharedStorage epilogue;
548  };
549 
553  CUTLASS_DEVICE GlobalLoadStream(Params const& params,
554  SharedStorage& shared_storage,
555  dim3 const& block)
556  : stream_a(params.global_stream_a,
557  shared_storage.main_loop.stream_a.global,
558  cutlass::make_Coord(0, params.k, params.m),
559  cutlass::make_Coord(0, 0, block.x)),
560  stream_b(params.global_stream_b,
561  shared_storage.main_loop.stream_b.global,
562  cutlass::make_Coord(0, params.k, params.n),
563  make_Coord(0, 0, block.y)) {}
564 
566  CUTLASS_DEVICE void copy() {
567  stream_a.copy();
568  stream_b.copy();
569  }
570 
572  CUTLASS_DEVICE void commit() {
573  stream_a.commit();
574  stream_b.commit();
575  }
576 
578  CUTLASS_DEVICE void residue(Index k, bool skip_clear = false) {
579  stream_a.residue(k, skip_clear);
580  stream_b.residue(k, skip_clear);
581  }
582 
587  };
588 
592  CUTLASS_DEVICE SharedLoadStream(Params const& params, SharedStorage& shared_storage) {
593  stream_a.initialize(params.shared_stream_a, shared_storage.main_loop.stream_a.shared);
594  stream_b.initialize(params.shared_stream_b, shared_storage.main_loop.stream_b.shared);
595  }
596 
598  CUTLASS_DEVICE void copy(int step) {
599  stream_a.copy(step, fetched_a[step % 2]);
600  stream_b.copy(step, fetched_b[step % 2]);
601  }
602 
604  CUTLASS_DEVICE void commit(int step) {
605  stream_a.commit(fetched_a[step % 2], transformed_a[step % 2]);
606  stream_b.commit(fetched_b[step % 2], transformed_b[step % 2]);
607  }
608 
610  CUTLASS_DEVICE typename SharedLoadStreamA::Fragment const& fragment_a(int step) const {
611  return transformed_a[step % 2];
612  }
613 
615  CUTLASS_DEVICE typename SharedLoadStreamB::Fragment const& fragment_b(int step) const {
616  return transformed_b[step % 2];
617  }
618 
620  CUTLASS_DEVICE void inc_stage() {
621  stream_a.inc_stage();
622  stream_b.inc_stage();
623  }
624 
628  typename SharedLoadStreamA::FetchedFragment fetched_a[2];
630  typename SharedLoadStreamA::TransformedFragment transformed_a[2];
634  typename SharedLoadStreamB::FetchedFragment fetched_b[2];
636  typename SharedLoadStreamB::TransformedFragment transformed_b[2];
637  };
638 
640  static CUTLASS_DEVICE void shared_load_fence(bool in_loop) {
641  if (SharedLoadStreamA::Iterator::kRequiresLoadFence ||
642  SharedLoadStreamB::Iterator::kRequiresLoadFence) {
643  __syncthreads();
644  }
645  }
646 
648  static CUTLASS_DEVICE void shared_store_fence(bool in_loop) { __syncthreads(); }
649 };
650 
652 
653 template <typename GemmTileTraitsHelperA_, typename GemmTileTraitsHelperB_, typename Index_>
661  typedef TileStoreIterator<typename GemmTileTraitsHelperA_::SharedStoreTileTraits,
662  typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar,
669 
676  typedef TileStoreIterator<typename GemmTileTraitsHelperB_::SharedStoreTileTraits,
677  typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar,
684 
686  typedef TileLoadIterator<typename GemmTileTraitsHelperA_::SharedLoadTileTraits,
687  typename GemmTileTraitsHelperA_::Scalar,
694  typedef TileLoadIterator<typename GemmTileTraitsHelperB_::SharedLoadTileTraits,
695  typename GemmTileTraitsHelperB_::Scalar,
701 };
702 
704 
705 template <
707  MatrixLayout::Kind kLayoutA_,
709  MatrixLayout::Kind kLayoutB_,
711  typename GemmConfig_,
713  typename Epilogue_,
715  typename Index_ = int,
716  // The configuration for the A matrix.
717  typename GemmTileTraitsHelperA_ = GemmTileTraitsHelperA<kLayoutA_, GemmConfig_>,
718  // The configuration for the B matrix.
719  typename GemmTileTraitsHelperB_ = GemmTileTraitsHelperB<kLayoutB_, GemmConfig_>,
720  // The helper class to create the streams and iterators.
721  typename Helper_ =
724  // The config.
725  GemmConfig_,
726  // The stream to load A from global memory to shared memory.
727  typename Helper_::GlobalLoadStreamA,
728  // The stream to load B from global memory to shared memory.
729  typename Helper_::GlobalLoadStreamB,
730  // The stream to load A from shared memory.
731  typename Helper_::SharedLoadStreamA,
732  // The stream to load B from shared memory.
733  typename Helper_::SharedLoadStreamB,
734  // The epilogue.
735  Epilogue_,
736  // The block swizzle to reorganize the grid.
737  IdentityBlockSwizzle,
738  // The index.
739  Index_,
740  // The tool used to clear accumulators.
741  ClearAccumulators<typename GemmConfig_::Accumulators::Element> > {
742 };
743 
745 
746 } // namespace gemm
747 } // namespace cutlass
Index n
Definition: gemm_traits.h:483
+
static int const kWarpSize
The default warp size (32 threads per warp).
Definition: gemm_traits.h:104
+
Epilogue::SharedStorage epilogue
Definition: gemm_traits.h:547
+
static int const kScalarsPerStsA
Definition: gemm_traits.h:110
+
GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for B^N.
Definition: gemm_traits.h:340
+
ScalarA_ ScalarA
The scalar for A.
Definition: gemm_traits.h:82
+
GlobalLoadStreamA_ GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: gemm_traits.h:435
+
GlobalStoreIteratorD::Scalar ScalarD
The scalar for D.
Definition: gemm_epilogue.h:98
+
MultiplyAdd_ MultiplyAdd
The functor to do D = A*B + C.
Definition: gemm_traits.h:93
+
static int const kAccumulatorsPerLdsA
The number of accumulators that are going to be fed from one LDS A/B.
Definition: gemm_traits.h:127
+
Definition: load_store.h:42
+
static int const kScalarsPerLdsA
Definition: gemm_traits.h:111
+
SharedLoadStreamA_ SharedLoadStreamA
The iterator for A to load from shared memory.
Definition: gemm_traits.h:449
+
MultiplyAdd::InstructionShape InstructionShape
The shape of the instruction.
Definition: gemm_traits.h:95
+
Definition: convert.h:33
+
SharedLoadStreamA::Params shared_stream_a
The params for the A stream from shared memory.
Definition: gemm_traits.h:489
+
Definition: gemm_shared_tile.h:129
+
GlobalLoadStreamB_ GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: gemm_traits.h:442
+ +
CUTLASS_DEVICE void inc_stage()
Increment the stage.
Definition: gemm_traits.h:620
+
TileStoreIterator< typename GemmTileTraitsHelperA_::SharedStoreTileTraits, typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: gemm_traits.h:665
+
static int const kScalarsPerLdsB
Definition: gemm_traits.h:116
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
Epilogue::ScalarD ScalarD
Definition: gemm_traits.h:471
+
The storage in shared memory.
Definition: gemm_traits.h:543
+
SharedLoadStream< SharedLoadIteratorB > SharedLoadStreamB
The stream to load B from shared memory.
Definition: gemm_traits.h:700
+
Index k
Definition: gemm_traits.h:483
+ +
Definition: gemm_global_tile.h:70
+
SharedLoadStreamA::FetchedFragment fetched_a[2]
The fragments to fetch A.
Definition: gemm_traits.h:628
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
GemmConfig_::ScalarB Scalar
The input scalar.
Definition: gemm_traits.h:283
+
GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for B^T.
Definition: gemm_traits.h:383
+
SharedLoadStreamB_ SharedLoadStreamB
The iterator for B to load from shared memory.
Definition: gemm_traits.h:451
+
static int const kScalarsPerStgD
The number of scalars per STS/LDS/STG for D.
Definition: gemm_traits.h:122
+
CUTLASS_DEVICE void copy(int step)
Trigger the copies from shared memory to registers.
Definition: gemm_traits.h:598
+
GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^N.
Definition: gemm_traits.h:301
+
Definition: convert.h:69
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
SharedLoadStreamA stream_a
The stream for A.
Definition: gemm_traits.h:626
+
SharedLoadStream< SharedLoadIteratorA > SharedLoadStreamA
The stream to load A from shared memory.
Definition: gemm_traits.h:692
+
Definition: gemm_shared_tile.h:38
+
ScalarC_ ScalarC
The scalar for C.
Definition: gemm_traits.h:86
+
CUTLASS_DEVICE void copy()
Trigger the copies from shared memory to registers.
Definition: gemm_traits.h:566
+
GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0 > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for A^N.
Definition: gemm_traits.h:199
+
Epilogue_ Epilogue
The epilogue.
Definition: gemm_traits.h:468
+
GlobalLoadStreamA_::Scalar ScalarA
The scalar for A.
Definition: gemm_traits.h:439
+
Definition: tile_iterator.h:62
+
GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^N.
Definition: gemm_traits.h:165
+
ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
The number of warps.
Definition: gemm_traits.h:102
+
GemmConfig_::ScalarA Scalar
The input scalar.
Definition: gemm_traits.h:147
+
Definition: gemm_shared_tile.h:198
+
GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
The shared storage for B.
Definition: gemm_traits.h:457
+
Definition: gemm_global_tile.h:159
+
Epilogue::ScalarC ScalarC
The scalars in the epilogue.
Definition: gemm_traits.h:470
+
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: gemm_traits.h:683
+
SharedLoadStreamB stream_b
The stream for B.
Definition: gemm_traits.h:632
+
Assemble the shared load stream for A/B.
Definition: gemm_traits.h:590
+
GlobalLoadStreamB stream_b
The stream for B.
Definition: gemm_traits.h:586
+
GemmConfig::MultiplyAdd MultiplyAdd
The multiply-add functor.
Definition: gemm_traits.h:463
+
static CUTLASS_DEVICE void shared_load_fence(bool in_loop)
The memory fence for shared loads.
Definition: gemm_traits.h:640
+
GemmConfig_ GemmConfig
The configuration.
Definition: gemm_traits.h:430
+
Definition: gemm_global_stream.h:161
+
SharedLoadStreamB::TransformedFragment transformed_b[2]
The fragments to transform B.
Definition: gemm_traits.h:636
+
Definition: gemm_traits.h:273
+
GlobalLoadStreamA stream_a
The stream for A.
Definition: gemm_traits.h:584
+
GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for A^T.
Definition: gemm_traits.h:267
+
Definition: clear_accumulators.h:38
+
StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB > stream_b
Definition: gemm_traits.h:537
+
The params.
Definition: gemm_traits.h:481
+
static int const kScalarsPerLdgA
The number of scalars per LDG/STS/LDS for A.
Definition: gemm_traits.h:109
+
CUTLASS_DEVICE SharedLoadStreamB::Fragment const & fragment_b(int step) const
The fragment B.
Definition: gemm_traits.h:615
+
Copy< typename GlobalLoadIteratorB::Fragment > GlobalTransformerB
The data converter for B before storing to shared memory.
Definition: gemm_traits.h:674
+
GemmConfig_::ScalarB Scalar
The input scalar.
Definition: gemm_traits.h:351
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB_::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The global iterator to load B from global memory.
Definition: gemm_traits.h:672
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+ +
Definition: matrix_traits.h:36
+
CUTLASS_DEVICE void residue(Index k, bool skip_clear=false)
Execute the residue code.
Definition: gemm_traits.h:578
+
MultiplyAdd::Accumulators Accumulators
The accumulators.
Definition: gemm_traits.h:99
+
ClearAccumulators_ ClearAccumulators
Clear the accumulators.
Definition: gemm_traits.h:478
+
Definition: gemm_shared_stream.h:44
+
GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^T.
Definition: gemm_traits.h:228
+
Defines a type for restructuring a tile.
+
Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
+
Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
Definition: shape.h:126
+
static int const kScalarsPerStsB
Definition: gemm_traits.h:115
+
Defines abstractions for efficiently clearing accumulator tiles.
+
Definition: gemm_traits.h:79
+
Assemble the global load streams for A/B.
Definition: gemm_traits.h:551
+
static int const kScalarsPerStsD
Definition: gemm_traits.h:123
+
static CUTLASS_DEVICE void shared_store_fence(bool in_loop)
The memory fence for shared stores.
Definition: gemm_traits.h:648
+
GemmConfig_::ScalarA Scalar
The input scalar.
Definition: gemm_traits.h:210
+
Definition: gemm_traits.h:137
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
Initialize the parameters.
Definition: gemm_traits.h:497
+
GlobalLoadStream_::SharedStorage global
Definition: gemm_traits.h:527
+
Definition: matrix_traits.h:43
+
Definition: identity_block_swizzle.h:37
+
GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for A^N.
Definition: gemm_traits.h:179
+
ScalarB_ ScalarB
The scalar for B.
Definition: gemm_traits.h:84
+
GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:353
+
GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:285
+
GlobalLoadStreamB_::Scalar ScalarB
The scalar for B.
Definition: gemm_traits.h:446
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
The shared storage for A.
Definition: gemm_traits.h:454
+
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: gemm_traits.h:668
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Definition: gemm_traits.h:428
+
MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
The number of accumulators per warp.
Definition: gemm_traits.h:97
+
SharedLoadStreamA::TransformedFragment transformed_a[2]
The fragments to transform A.
Definition: gemm_traits.h:630
+
SharedLoadStream_::SharedStorage shared
Definition: gemm_traits.h:529
+
GlobalLoadStreamB::Params global_stream_b
The params for the B stream.
Definition: gemm_traits.h:487
+
SharedLoadStreamB::FetchedFragment fetched_b[2]
The fragments to fetch B.
Definition: gemm_traits.h:634
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
static int const kScalarsPerLdgC
The number of scalars per LDG for C.
Definition: gemm_traits.h:119
+
ScalarD_ ScalarD
The scalar for D.
Definition: gemm_traits.h:88
+
static int const kThreads
The numnber of threads.
Definition: gemm_traits.h:106
+
Defies functors for mapping blockIdx to partitions of the GEMM computation.
+
Index m
The dimensions of the GEMM.
Definition: gemm_traits.h:483
+
BlockSwizzle_ BlockSwizzle
The block swizzle to reorganize the grid.
Definition: gemm_traits.h:474
+
TileLoadIterator< typename GemmTileTraitsHelperA_::SharedLoadTileTraits, typename GemmTileTraitsHelperA_::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: gemm_traits.h:690
+
Definition: matrix_traits.h:36
+
TileLoadIterator< typename GemmTileTraitsHelperB_::SharedLoadTileTraits, typename GemmTileTraitsHelperB_::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: gemm_traits.h:698
+
CUTLASS_DEVICE SharedLoadStream(Params const &params, SharedStorage &shared_storage)
Ctor.
Definition: gemm_traits.h:592
+
CUTLASS_DEVICE GlobalLoadStream(Params const &params, SharedStorage &shared_storage, dim3 const &block)
Ctor.
Definition: gemm_traits.h:553
+
GlobalLoadIteratorC::Scalar ScalarC
The scalar for C.
Definition: gemm_epilogue.h:96
+
Index_ Index
The index.
Definition: gemm_traits.h:476
+
GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:149
+
TileStoreIterator< typename GemmTileTraitsHelperB_::SharedStoreTileTraits, typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: gemm_traits.h:680
+
Epilogue::Params epilogue
The params for the epilogue.
Definition: gemm_traits.h:493
+
Kind
Definition: matrix_traits.h:36
+
GlobalLoadStreamA::Params global_stream_a
The params for the A stream.
Definition: gemm_traits.h:485
+
The shared storage.
Definition: clear_accumulators.h:40
+
CUTLASS_DEVICE void commit(int step)
Commit the data.
Definition: gemm_traits.h:604
+
static int const kScalarsPerLdsD
Definition: gemm_traits.h:124
+
Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
+
MainLoopSharedStorage main_loop
Definition: gemm_traits.h:545
+
static MatrixLayout::Kind const kLayoutA
The layout of A.
Definition: gemm_traits.h:437
+
OutputTile_ OutputTile
The tile.
Definition: gemm_traits.h:91
+
static int const kScalarsPerLdgB
The number of scalars per LDG/STS/LDS for B.
Definition: gemm_traits.h:114
+
Definition: matrix_traits.h:43
+
Definition: gemm_traits.h:654
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA_::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The global iterator to load A from global memory.
Definition: gemm_traits.h:657
+
GemmConfig::OutputTile OutputTile
The output tile.
Definition: gemm_traits.h:432
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
Copy< typename GlobalLoadIteratorA::Fragment > GlobalTransformerA
The data converter for A before storing to shared memory.
Definition: gemm_traits.h:659
+
CUTLASS_DEVICE void commit()
Commit the data.
Definition: gemm_traits.h:572
+
GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0 > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for B^T.
Definition: gemm_traits.h:403
+
ClearAccumulators::SharedStorage clear
Definition: gemm_traits.h:539
+
StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA > stream_a
Definition: gemm_traits.h:535
+
GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^T.
Definition: gemm_traits.h:369
+
Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Defines conversion operations among Fragments of different base type.
+
SharedLoadStreamB::Params shared_stream_b
The params for the B stream from shared memory.
Definition: gemm_traits.h:491
+
Definition: gemm_traits.h:723
+
CUTLASS_DEVICE SharedLoadStreamA::Fragment const & fragment_a(int step) const
The fragment A.
Definition: gemm_traits.h:610
+
static MatrixLayout::Kind const kLayoutB
The layout of B.
Definition: gemm_traits.h:444
+
static int const kAccumulatorsPerLdsB
Definition: gemm_traits.h:128
+
static int const kStages
The number of stages in shared memory to implement double, triple, more-buffering.
Definition: gemm_traits.h:131
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: thread_multiply_add.h:51
+
GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
The scalar stored in shared memory.
Definition: gemm_traits.h:212
+
+ + + + diff --git a/docs/generated-html/globals.html b/docs/generated-html/globals.html new file mode 100644 index 0000000000..ddd387b2f8 --- /dev/null +++ b/docs/generated-html/globals.html @@ -0,0 +1,147 @@ + + + + + + + +Cutlass: File Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
Here is a list of all file members with links to the files they belong to:
+
+ + + + diff --git a/docs/generated-html/globals_defs.html b/docs/generated-html/globals_defs.html new file mode 100644 index 0000000000..d1df12cbe1 --- /dev/null +++ b/docs/generated-html/globals_defs.html @@ -0,0 +1,144 @@ + + + + + + + +Cutlass: File Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+ + + + diff --git a/docs/generated-html/globals_func.html b/docs/generated-html/globals_func.html new file mode 100644 index 0000000000..6f910b764b --- /dev/null +++ b/docs/generated-html/globals_func.html @@ -0,0 +1,84 @@ + + + + + + + +Cutlass: File Members + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+ + + + diff --git a/docs/generated-html/group__fragment__concept.html b/docs/generated-html/group__fragment__concept.html new file mode 100644 index 0000000000..85e3572764 --- /dev/null +++ b/docs/generated-html/group__fragment__concept.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Fragment Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Fragment Concept
+
+
+ + + + + +

+Classes

struct  cutlass::Fragment< Element_, kElements_, kAlignment_ >
 A template defining Fragment Concept. More...
 
+

Detailed Description

+

Fragment Concept is a statically sized array for storing parts of tiles held by individual CUDA threads.

+
fragment_concept
Types satisfying Fragment Concept define the following members
    +
  • Element - type of each access held within the fragment
  • +
  • kElements - number of elements stored by the fragment
  • +
  • clear() - overwrites the fragment storage with zeros
  • +
  • Element & operator[](int i) - by-reference access of the ith element
  • +
  • Element const & operator[](int i) const - const by-reference access of the ith element
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__fragment__iterator__concept.html b/docs/generated-html/group__fragment__iterator__concept.html new file mode 100644 index 0000000000..dc89e72e5a --- /dev/null +++ b/docs/generated-html/group__fragment__iterator__concept.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Fragment Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Fragment Iterator Concept
+
+
+ + + + + +

+Classes

struct  cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
 A template defining Fragment Iterator Concept. More...
 
+

Detailed Description

+

Fragment Iterator Concept provides structured access to the elements within a fragment with an optional bitcast to the desired access type

+
fragment_iterator_concept
Types satisfying Fragment Iterator Concept define the following members
    +
  • AccessType& operator[](int i) - provides access to the ith element of the fragment
  • +
  • AccessType& at(int d, int h, int w, int c) - applies Layout Concept to fragment and provides access to element at (d, h, w, c)
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__layout__concept.html b/docs/generated-html/group__layout__concept.html new file mode 100644 index 0000000000..3fe8532c84 --- /dev/null +++ b/docs/generated-html/group__layout__concept.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Layout Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Layout Concept
+
+
+ + + + + +

+Classes

struct  cutlass::Shape< kD_, kH_, kW_, kC_ >
 A Shape implementing Layout Concept describing the dimensions of a cube. More...
 
+

Detailed Description

+
Implementations of layout_concept are used to describe a cube with DxHxW elements and C
scalars per element. A HxW slice of a cube is called an image and a cube consists of D images.
+
Notations
Let Layout be an implementation of the Layout Concept.
+
Valid Expressions
    +
  • Layout::D specifies the depth of a cube
  • +
  • Layout::H specifies the height of a cube
  • +
  • Layout::W specifies the height of a cube
  • +
  • Layout::C specifies the number of channels of each element in a cube
  • +
  • Layout::W_c specifies the number of scalars of each row in one image of a cube.
  • +
  • Layout::H_w specifies the number of elements in an image slice.
  • +
  • Layout::H_w_c_specifies the number of scalars in an image slice.
  • +
  • Layout::D_h_w specifies the number of elements in a cube.
  • +
  • Layout::D_h_w_c specifies the number of scalars in a cube.
  • +
  • Layout::Strides is a Layout Concept specifying the strides.
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__predicate__iterator__concept.html b/docs/generated-html/group__predicate__iterator__concept.html new file mode 100644 index 0000000000..95c1ef2efe --- /dev/null +++ b/docs/generated-html/group__predicate__iterator__concept.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Predicate Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Predicate Iterator Concept
+
+
+ + + + + + + + +

+Classes

class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator
 A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. More...
 
class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator
 An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. More...
 
+

Detailed Description

+

Implementations of Predicate Iterator Concept enables accessing and traversing elements of a bit vector.

+
Const Predicate Iterator
A const Predicate Iterator Concept satisfies the following expressions
    +
  • ++it increments the iterator to the next predicate
  • +
  • *it returns the value of the currently pointed-to predicate
  • +
+
+
Mutable Predicate Iterator
A Predicate Iterator Concept that is non-const also satisfies the following expressions
    +
  • it.set(bool value) sets the value of the currently pointed-to predicate
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__predicate__tile__adapter.html b/docs/generated-html/group__predicate__tile__adapter.html new file mode 100644 index 0000000000..a4b809922e --- /dev/null +++ b/docs/generated-html/group__predicate__tile__adapter.html @@ -0,0 +1,88 @@ + + + + + + + +Cutlass: Predicate Tile Adapter Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Predicate Tile Adapter Concept
+
+
+

Implementations of Predicate Tile Adapter Concept provide a mapping between a the elements of a Tile Traits Concept and a Predicate Vector Concept.

+
Predicate Tile Adapter
A Predicate Tile Adapter Concept satisfies the following expressions
    +
  • at(int d, int h, int w, int c) - returns the value of a predicate corresponding to the access (d, h, w, c) within the tile.
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__predicate__vector__concept.html b/docs/generated-html/group__predicate__vector__concept.html new file mode 100644 index 0000000000..5147870e64 --- /dev/null +++ b/docs/generated-html/group__predicate__vector__concept.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Predicate Vector Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Predicate Vector Concept
+
+
+ + + + + +

+Classes

struct  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >
 Statically sized array of bits implementing. More...
 
+

Detailed Description

+

Implementations of Predicate Vector Concept contain an ordered set of boolean predicates which may be used as conditionals in other device-side operations. Both random access and iterators offering sequential access are provided.

+
Predicate Vector
A Predicate Vector Concept satisfies the following expressions
    +
  • at(int idx) - returns the value of the indexed predicate
  • +
  • set(int idx, bool value) - sets the value of the indexed predicate
  • +
  • begin() - returns a Predicate Iterator Concept pointing to the first predicate
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__tile__load__iterator__concept.html b/docs/generated-html/group__tile__load__iterator__concept.html new file mode 100644 index 0000000000..2bc4b4e346 --- /dev/null +++ b/docs/generated-html/group__tile__load__iterator__concept.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Tile Load Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Tile Load Iterator Concept
+
+
+ + + + + +

+Classes

struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
 An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
 
+

Detailed Description

+

Tile Load Iterator Concept enables loading a tile from addressable memory into a fragment

+
Tile Load Iterator Concept
Types satisfying Tile Load Iterator Concept define the following members
    +
  • PredicateVector - a Predicate Vector Concept with sufficient predicate storage for each access implied by the tile traits
  • +
  • Fragment - the destination fragment type satisfying Fragment Concept
  • +
  • initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate vector according to externally specified bounds
  • +
  • load_post_increment(fragment, pred_it) - a method that loads a fragment and increments the iterator to the next tile, guarded by a Predicate Iterator Concept
  • +
  • load_post_increment(fragment) - a method that loads a fragment and increments the iterator to the next tile
  • +
  • load(fragment, pred_it) - a const method that loads a fragment, guarded by a Predicate Iterator Concept
  • +
  • load(fragment) - a method that loads a fragment
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__tile__store__iterator__concept.html b/docs/generated-html/group__tile__store__iterator__concept.html new file mode 100644 index 0000000000..bde5405317 --- /dev/null +++ b/docs/generated-html/group__tile__store__iterator__concept.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Tile Store Iterator Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Tile Store Iterator Concept
+
+
+ + + + + +

+Classes

struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
 An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
 
+

Detailed Description

+

Tile Store Iterator Concept enables storing a tile to addressable memory

+
Tile Store Iterator Concept
Types satisfying Tile Load Iterator Concept define the following members
    +
  • PredicateVector - a Predicate Vector Concept with sufficient predicate storage for each access implied by the tile traits
  • +
  • Fragment - the destination fragment type satisfying Fragment Concept
  • +
  • initialize_predicates(pred_it, bounds, block_offset) - function initializing a predicate vector according to externally specified bounds
  • +
  • store_post_increment(fragment, pred_it) - a method that stores a fragment and increments the iterator to the next tile, guarded by a Predicate Iterator Concept
  • +
  • store_post_increment(fragment) - a method that stores a fragment and increments the iterator to the next tile
  • +
  • store(fragment, pred_it) - a const method that stores a fragment, guarded by a Predicate Iterator Concept
  • +
  • store(fragment) - a method that loads a fragment
  • +
+
+
+ + + + diff --git a/docs/generated-html/group__tile__traits__concept.html b/docs/generated-html/group__tile__traits__concept.html new file mode 100644 index 0000000000..16e4bd8ae6 --- /dev/null +++ b/docs/generated-html/group__tile__traits__concept.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Tile Traits Concept + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+ +
+
Tile Traits Concept
+
+
+ + + + + +

+Classes

struct  cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >
 A template defining Tile Traits Concept. More...
 
+

Detailed Description

+

Tile Traits Concept is a type definining the shape of a tile and the distribution of accesses by individual entities, either threads or other.

+
Tile Traits Concept
Types satisfying Tile Traits Concept define the following members
    +
  • Tile - a type satisfying Layout Concept describing the dimensions of the tile
  • +
  • Delta - a type satisfying Layout Concept describing the increments between accesses along each dimension
  • +
  • Iterations - a type satisfying Layout Concept describing the number of accesses along each dimension
  • +
  • Offset - the type of a functor computing the offset of each participating entity as a Coord<4>.
  • +
+
+
+ + + + diff --git a/docs/generated-html/hgemm__global__tile_8h.html b/docs/generated-html/hgemm__global__tile_8h.html new file mode 100644 index 0000000000..b62b8c143b --- /dev/null +++ b/docs/generated-html/hgemm__global__tile_8h.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: hgemm_global_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_global_tile.h File Reference
+
+
+ +

Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the thread block-level tile into 2D subtiles loaded by the threads and facilitate memory accesses larger than 16 bits. +More...

+ +

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 
struct  cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__global__tile_8h_source.html b/docs/generated-html/hgemm__global__tile_8h_source.html new file mode 100644 index 0000000000..bdd647d1a7 --- /dev/null +++ b/docs/generated-html/hgemm__global__tile_8h_source.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: hgemm_global_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_global_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/coord.h>
34 #include <cutlass/matrix_traits.h>
35 #include <cutlass/reshape_tile.h>
36 
37 namespace cutlass {
38 namespace gemm {
39 
41 
42 template <GemmOperand::Kind kOperand_,
43  MatrixLayout::Kind kLayout_,
44  typename Scalar_,
45  typename Tile_,
46  typename Threads_,
47  int kAccessSize_>
49  // Which GEMM operand?
50  kOperand_,
51  // The layout.
52  kLayout_,
53  // The scalar.
54  Scalar_,
55  // The tile.
56  Tile_,
57  // The threads.
58  Threads_,
59  // The number of scalars per LDG/STG.
60  kAccessSize_> {
64  typedef typename Base::Threads Threads;
70  typedef Shape<Base::Tile::kH / Base::Threads::kH / 2,
71  2,
72  Base::Tile::kW / Base::Threads::kW,
73  Base::Tile::kC / Base::kAccessSize>
76  struct ThreadOffset {
78  Coord<4> operator()() const {
79  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
80  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
81 
82  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
83  }
84  };
85 };
86 
88 
89 } // namespace gemm
90 } // namespace cutlass
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Definition: gemm_global_tile.h:70
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Shape< Base::Tile::kH/Base::Threads::kH/2, 2, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: hgemm_global_tile.h:74
+
Base::Threads Threads
The threads.
Definition: hgemm_global_tile.h:64
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: hgemm_global_tile.h:78
+
Shape< Base::Threads::kH *2, 1, Base::Threads::kW, Base::kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: hgemm_global_tile.h:68
+
Shape< 1, 2, Base::Tile::kC > ThreadsDelta
The threads strides.
Definition: hgemm_global_tile.h:66
+
Defines a type for restructuring a tile.
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
The base class.
Definition: hgemm_global_tile.h:62
+
Definition: hgemm_global_tile.h:48
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+ +
static int const kW
The width of the cube.
Definition: shape.h:70
+
Kind
Definition: matrix_traits.h:36
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_global_tile.h:80
+
Computes the thread offset in (H, W) based on thread ID.
Definition: hgemm_global_tile.h:76
+
Kind
Definition: matrix_traits.h:43
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
+ + + + diff --git a/docs/generated-html/hgemm__multiply__add_8h.html b/docs/generated-html/hgemm__multiply__add_8h.html new file mode 100644 index 0000000000..3c6c609e88 --- /dev/null +++ b/docs/generated-html/hgemm__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: hgemm_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_multiply_add.h File Reference
+
+
+ +

Specialization implementing multiply-add operation on half-precision floating point fragments. +More...

+ +

Go to the source code of this file.

+ + + + + +

+Classes

struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
 Template performing matrix multiply-add operation within a thread. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__multiply__add_8h_source.html b/docs/generated-html/hgemm__multiply__add_8h_source.html new file mode 100644 index 0000000000..73ef904095 --- /dev/null +++ b/docs/generated-html/hgemm__multiply__add_8h_source.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: hgemm_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
34 
35 namespace cutlass {
36 namespace gemm {
37 
39 
41 template <typename AccumulatorsPerThread_, typename ThreadsPerWarp_>
42 struct ThreadMultiplyAdd<AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half> {
46  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
48  typedef ThreadsPerWarp_ ThreadsPerWarp;
52  typedef half ScalarA;
56  typedef half ScalarB;
60  typedef half ScalarC;
63 
65  static_assert(AccumulatorsPerThread::kH % 2 == 0, "Invalid size");
66  static_assert(AccumulatorsPerThread::kW % 2 == 0, "Invalid size");
67 
69  CUTLASS_DEVICE ThreadMultiplyAdd() {}
70 
72  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
73  FragmentB const& b,
74  Accumulators const& c,
75  Accumulators& d) {
76 #if defined(__CUDACC__) && __CUDA_ARCH__ >= 530
77  // The inputs.
78  __half2 const* a_half2 = reinterpret_cast<__half2 const*>(&a[0]);
79  __half2 const* b_half2 = reinterpret_cast<__half2 const*>(&b[0]);
80  __half2 const* c_half2 = reinterpret_cast<__half2 const*>(&c[0]);
81 
82  // The output.
83  __half2* d_half2 = reinterpret_cast<__half2*>(&d[0]);
84 
85  for (int j = 0; j < AccumulatorsPerThread::kH / 2; ++j) {
86  for (int i = 0; i < AccumulatorsPerThread::kW / 2; ++i) {
87  // The offsets in the output fragment.
88  int const k0 = (2 * j + 0) * (AccumulatorsPerThread::kW / 2) + i;
89  int const k1 = (2 * j + 1) * (AccumulatorsPerThread::kW / 2) + i;
90 
91  // Compute the product a[i] * b[j].H0_H0.
92  d_half2[k0] = __hfma2(a_half2[i], __low2half2(b_half2[j]), c_half2[k0]);
93  // Compute the product a[i] * b[j].H1_H1.
94  d_half2[k1] = __hfma2(a_half2[i], __high2half2(b_half2[j]), c_half2[k1]);
95  }
96  }
97 #endif
98  }
99 };
100 
102 
103 } // namespace gemm
104 } // namespace cutlass
+
Definition: convert.h:33
+
Fragment< half, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
The accumulators.
Definition: hgemm_multiply_add.h:62
+
ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: hgemm_multiply_add.h:50
+
half ScalarC
The type for C and D.
Definition: hgemm_multiply_add.h:60
+
CUTLASS_DEVICE ThreadMultiplyAdd()
Make sure there&#39;s an even number of elements in both dimensions.
Definition: hgemm_multiply_add.h:69
+
Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
Definition: shape.h:119
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Template implementing matrix multiply-add operations on fragments.
+
Shape< 1, 1, 2, 1 > InstructionShape
The shape of the instruction.
Definition: hgemm_multiply_add.h:44
+ +
ThreadsPerWarp_ ThreadsPerWarp
The number of threads per warp.
Definition: hgemm_multiply_add.h:48
+
AccumulatorsPerThread_ AccumulatorsPerThread
The number of accumulators per thread.
Definition: hgemm_multiply_add.h:46
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
Multiply : d = a*b + c.
Definition: hgemm_multiply_add.h:72
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
The fragment for A.
Definition: hgemm_multiply_add.h:54
+
Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
The fragment for B.
Definition: hgemm_multiply_add.h:58
+
+ + + + diff --git a/docs/generated-html/hgemm__swizzle_8h.html b/docs/generated-html/hgemm__swizzle_8h.html new file mode 100644 index 0000000000..aef7ac75ee --- /dev/null +++ b/docs/generated-html/hgemm__swizzle_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: hgemm_swizzle.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_swizzle.h File Reference
+
+
+ +

Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for multiplicands. +More...

+
#include <cuda_fp16.h>
+#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::HgemmSwizzle< GlobalIterator_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__swizzle_8h_source.html b/docs/generated-html/hgemm__swizzle_8h_source.html new file mode 100644 index 0000000000..bb76b510c8 --- /dev/null +++ b/docs/generated-html/hgemm__swizzle_8h_source.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: hgemm_swizzle.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_swizzle.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cuda_fp16.h>
32 #include <cutlass/fragment.h>
33 
34 namespace cutlass {
35 namespace gemm {
36 
38 
39 template <typename GlobalIterator_>
40 struct HgemmSwizzle {
42  typedef GlobalIterator_ GlobalIterator;
44  typedef typename GlobalIterator::Fragment Fragment;
46  typedef typename GlobalIterator::FragmentShape FragmentShape;
47 
52 
55 
57  static_assert(FragmentShape::kH == 2 && ShapeCount<FragmentShape>::kWc == 2, "Not multiple of 2");
58 
60  CUTLASS_DEVICE HgemmSwizzle() {}
61 
63  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
64  // Expose src/dst as int arrays.
65  int const* src_int = reinterpret_cast<int const*>(&src[0]);
66  int* dst_int = reinterpret_cast<int*>(&dst[0]);
67 
68  // Transpose the data.
69  for (int d = 0; d < FragmentShape::kD; ++d) {
70  // The indices to read two consecutive "rows".
71  int const i0 = 2 * d + 0;
72  int const i1 = 2 * d + 1;
73 
74  int a0 = src_int[i0];
75  int a1 = src_int[i1];
76 
77  int b0, b1;
78  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(a0), "r"(a1));
79  asm volatile("prmt.b32 %0, %1, %2, 0x7632;" : "=r"(b1) : "r"(a0), "r"(a1));
80 
81  // The indices to store with "strides".
82  int const j0 = 0 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
83  int const j1 = 1 * (ShapeCount<FragmentShape>::kDhw / 2) + d;
84 
85  dst_int[j0] = b0;
86  dst_int[j1] = b1;
87  }
88  }
89 };
90 
92 
93 } // namespace gemm
94 } // namespace cutlass
GlobalIterator_ GlobalIterator
The global iterator.
Definition: hgemm_swizzle.h:42
+
Definition: convert.h:33
+
std::is_same (false specialization)
Definition: platform.h:412
+
CUTLASS_DEVICE HgemmSwizzle()
The src/dst must be half fragments.
Definition: hgemm_swizzle.h:60
+
CUTLASS_DEVICE void transform(Fragment const &src, Fragment &dst)
Transform a fragment.
Definition: hgemm_swizzle.h:63
+
Fragment InputFragment
The input fragment.
Definition: hgemm_swizzle.h:49
+
Fragment OutputFragment
The output fragment.
Definition: hgemm_swizzle.h:51
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
GlobalIterator::Fragment Fragment
The source fragment.
Definition: hgemm_swizzle.h:44
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
GlobalIterator::FragmentShape FragmentShape
The shape of the source fragment.
Definition: hgemm_swizzle.h:46
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
Definition: hgemm_swizzle.h:40
+
+ + + + diff --git a/docs/generated-html/hgemm__traits_8h.html b/docs/generated-html/hgemm__traits_8h.html new file mode 100644 index 0000000000..283ceb7504 --- /dev/null +++ b/docs/generated-html/hgemm__traits_8h.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: hgemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
hgemm_traits.h File Reference
+
+
+ +

Defies structural properties of half-precision GEMM computation. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >
 
struct  cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
 
struct  cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/hgemm__traits_8h_source.html b/docs/generated-html/hgemm__traits_8h_source.html new file mode 100644 index 0000000000..0d12493ec9 --- /dev/null +++ b/docs/generated-html/hgemm__traits_8h_source.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: hgemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
hgemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/convert.h>
31 #include <cutlass/reshape_tile.h>
32 
33 #include <cutlass/gemm/gemm.h>
42 
43 namespace cutlass {
44 namespace gemm {
45 
47 
48 template <
50  typename OutputTile_,
52  typename AccumulatorsPerThread_,
54  int kScalarsPerLdgA_ = 2,
56  int kScalarsPerLdgB_ = 2>
58  : public GemmConfig<
60  half,
62  half,
64  half,
66  half,
68  OutputTile_,
70  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, half, half, half>,
72  kScalarsPerLdgA_,
74  kScalarsPerLdgA_,
76  8,
78  kScalarsPerLdgB_,
80  kScalarsPerLdgB_,
82  8,
84  2,
86  8,
88  2,
90  2> {};
91 
93 
94 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
96 
97 template <typename Iterator_>
98 struct HgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
100 };
101 
102 template <typename Iterator_>
103 struct HgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
105 };
106 
108 
109 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
111 
112 template <typename Iterator_>
113 struct HgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
115 };
116 
117 template <typename Iterator_>
118 struct HgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
120 };
121 
123 
124 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
125 struct HgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
126 
128 
129 template <typename GemmConfig_>
130 struct HgemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_>
131  : public GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
134 
138  // The layout.
140  // The pointer.
141  half const,
142  // The tile has size MxK in GEMM's terminology.
144  // The threads are distributed as (threads / K ) x K (the traits may reorganize).
145  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
146  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
147  GemmConfig_::kScalarsPerLdgA>
149 
152  // The pointer.
153  half,
154  // The tile has size KxM in GEMM's terminology.
155  Shape<GemmConfig_::kStages,
156  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
157  GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>,
158  // The threads are distributed as warps x 32(the traits may reorganize).
159  typename GlobalTileTraits::Threads,
160  // The number of scalars per STS (STS.32 or STS.128, etc).
161  2,
162  // The skew to avoid bank conflicts added in the tile W dimension.
163  128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2>
165 
168  // The pointer.
169  half const,
170  // The output tile size.
171  typename GemmConfig_::OutputTile,
172  // The number of warps.
173  typename GemmConfig_::Warps,
174  // The number of threads per warp.
175  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
176  // The shape of the FMA instruction.
177  typename GemmConfig_::InstructionShape,
178  // The number of stages.
179  GemmConfig_::kStages,
180  // The number of scalars per LDS.
181  8,
182  // The skew.
183  SharedStoreTileTraits::kSkew>
185 };
186 
188 
189 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
190 struct HgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
191 
193 
194 template <typename GemmConfig_>
195 struct HgemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_>
196  : public GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
199 
203  // The layout.
205  // The pointer.
206  half const,
207  // The tile has size KxN in GEMM's terminology.
209  // The threads are distributed as (threads / K) x K (the traits may reorganize).
210  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
211  // The number of scalars per LDG (LDG.32 or LDG.128, etc)
212  GemmConfig_::kScalarsPerLdgB>
214 
217  // The pointer.
218  half,
219  // The tile has size KxN in GEMM's terminology.
220  Shape<GemmConfig_::kStages,
221  GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD,
222  GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>,
223  // The threads are distributed as (threads / K) x K (the traits may reorganize).
224  typename GlobalTileTraits::Threads,
225  // The number of scalars per STS (STS.32 or STS.128, etc).
226  2,
227  // The skew to avoid bank conflicts added in the tile W dimension.
228  128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2>
230 
233  // The pointer.
234  half const,
235  // The output tile size.
236  typename GemmConfig_::OutputTile,
237  // The number of warps.
238  typename GemmConfig_::Warps,
239  // The number of threads per warp.
240  typename GemmConfig_::MultiplyAdd::ThreadsPerWarp,
241  // The shape of the FMA instruction.
242  typename GemmConfig_::InstructionShape,
243  // The number of stages.
244  GemmConfig_::kStages,
245  // The number of scalars per LDS.
246  8,
247  // The skew.
248  SharedStoreTileTraits::kSkew>
250 };
251 
253 
254 template <
256  MatrixLayout::Kind kLayoutA_,
258  MatrixLayout::Kind kLayoutB_,
260  typename OutputTile_,
262  typename EpilogueFunctor_,
264  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
266  int kScalarsPerLdgA_ = 2,
268  int kScalarsPerLdgB_ = 2,
270  typename Index_ = int>
279 
284  typedef typename HgemmTransformerA<GemmTileTraitsHelperA::kLayout,
287  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
288  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
295 
299  // The default transformer for B.
300  typedef typename HgemmTransformerB<GemmTileTraitsHelperB::kLayout,
303  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
304  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
311 
313  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
314  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
321  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
322  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
328 
333 
338 };
339 
341 
342 template <
344  MatrixLayout::Kind kLayoutA_,
346  MatrixLayout::Kind kLayoutB_,
348  typename OutputTile_ = Shape<8, 128, 128>,
350  typename EpilogueFunctor_ = LinearScaling<half>,
352  typename AccumulatorsPerThread_ = Shape<8, 8, 16>,
354  int kScalarsPerLdgA_ = 2,
356  int kScalarsPerLdgB_ = 2,
358  typename Index_ = int,
360  typename Helper_ = HgemmTraitsHelper<kLayoutA_,
361  kLayoutB_,
362  OutputTile_,
363  EpilogueFunctor_,
364  AccumulatorsPerThread_,
365  kScalarsPerLdgA_,
366  kScalarsPerLdgB_,
367  Index_> >
368 struct HgemmTraits : public GemmTraits<
369  // The config.
370  typename Helper_::GemmConfig,
371  // The stream to load A from global memory to shared memory.
372  typename Helper_::GlobalLoadStreamA,
373  // The stream to load B from global memory to shared memory.
374  typename Helper_::GlobalLoadStreamB,
375  // The stream to load A from shared memory.
376  typename Helper_::SharedLoadStreamA,
377  // The stream to load B from shared memory.
378  typename Helper_::SharedLoadStreamB,
379  // The epilogue.
380  typename Helper_::Epilogue,
381  // The block swizzle to reorganize the grid.
382  IdentityBlockSwizzle,
383  // The index.
384  Index_,
385  // The tool used to clear accumulators.
386  typename Helper_::ClearAccumulators> {};
387 
389 
390 } // namespace gemm
391 } // namespace cutlass
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The iterator to load A from global memory.
Definition: hgemm_traits.h:282
+
Definition: load_store.h:42
+
HgemmSwizzle< Iterator_ > Transformer
Definition: hgemm_traits.h:119
+
Definition: convert.h:33
+
Definition: gemm_shared_tile.h:129
+ +
Definition: gemm_epilogue.h:53
+
Defines iterators for efficiently loading and storing to global memory.
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The iterator to load B from global memory.
Definition: hgemm_traits.h:298
+
ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
The object to clear accumulators.
Definition: hgemm_traits.h:332
+
Defines structural properties of complete GEMM computation.
+
TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: hgemm_traits.h:291
+
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: hgemm_traits.h:294
+
HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^N.
Definition: hgemm_traits.h:213
+
Definition: hgemm_traits.h:95
+
GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Base
The base config.
Definition: hgemm_traits.h:198
+
SharedLoadStream< SharedLoadIteratorA > SharedLoadStreamA
The stream to load A from shared memory.
Definition: hgemm_traits.h:319
+
Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
Definition: hgemm_traits.h:99
+
Definition: hgemm_traits.h:368
+
HgemmSwizzle< Iterator_ > Transformer
Definition: hgemm_traits.h:104
+
Definition: tile_iterator.h:62
+
Definition: gemm_shared_tile.h:198
+
TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: hgemm_traits.h:325
+
Definition: gemm_global_tile.h:159
+
GemmEpilogue< GemmEpilogueTraits > Epilogue
The epilogue.
Definition: hgemm_traits.h:337
+
HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
The default transformer for A.
Definition: hgemm_traits.h:285
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Definition: gemm_global_stream.h:161
+
Definition: gemm_traits.h:273
+
Definition: hgemm_traits.h:125
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
SharedLoadStream< SharedLoadIteratorB > SharedLoadStreamB
The stream to load B from shared memory.
Definition: hgemm_traits.h:327
+
Definition: hgemm_traits.h:110
+
GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Base
The base config.
Definition: hgemm_traits.h:133
+
TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: hgemm_traits.h:317
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > GemmEpilogueTraits
The traits class for the epilogue.
Definition: hgemm_traits.h:335
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: matrix_traits.h:36
+ +
Definition: gemm_shared_stream.h:44
+
Defines a type for restructuring a tile.
+
Specialization implementing multiply-add operation on half-precision floating point fragments...
+
Definition: gemm_traits.h:79
+
Transposes a tile of 16b elements. Used by HGEMM to construct a K-strided layout in shared memory for...
+
Definition: gemm_traits.h:137
+
GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for B^N.
Definition: hgemm_traits.h:249
+
Definition: matrix_traits.h:43
+
HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > GemmConfig
The HGEMM config.
Definition: hgemm_traits.h:274
+
Definition: hgemm_traits.h:190
+
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: hgemm_traits.h:310
+
GemmConfig::MultiplyAdd MultiplyAdd
The functor to do the multiply-add in the main loop.
Definition: hgemm_traits.h:330
+
HgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
The GEMM config for B.
Definition: hgemm_traits.h:278
+
Definition: gemm_traits.h:428
+
Definition: hgemm_global_tile.h:48
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Definition: gemm_epilogue_traits.h:300
+
GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
The traits class to build the iterator to load from shared memory for A^T.
Definition: hgemm_traits.h:184
+
HgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
The GEMM config for A.
Definition: hgemm_traits.h:276
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Definition: matrix_traits.h:36
+
Kind
Definition: matrix_traits.h:36
+
HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
Definition: hgemm_traits.h:301
+ +
Definition: hgemm_traits.h:271
+
HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^T.
Definition: hgemm_traits.h:148
+
Tile traits used to construct global tile iterator for HGEMM. This is intended to partition the threa...
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Definition: convert.h:38
+
Definition: matrix_traits.h:43
+
Implements a software-pipelined efficient GEMM.
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines structural properties of the GEMM epilogue.
+
Definition: hgemm_swizzle.h:40
+
Defines conversion operations among Fragments of different base type.
+
Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
Definition: hgemm_traits.h:114
+
Definition: hgemm_traits.h:57
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: hgemm_traits.h:307
+
+ + + + diff --git a/docs/generated-html/hierarchy.html b/docs/generated-html/hierarchy.html new file mode 100644 index 0000000000..25ba6bdabe --- /dev/null +++ b/docs/generated-html/hierarchy.html @@ -0,0 +1,411 @@ + + + + + + + +Cutlass: Class Hierarchy + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Class Hierarchy
+
+
+
This inheritance list is sorted roughly, but not completely, alphabetically:
+
[detail level 123]

 Ccutlass::platform::aligned_chunk< Align >
 Ccutlass::platform::aligned_storage< Len, Align >Std::aligned_storage
 Ccutlass::AlignedStruct< kAlignment_ >
 Ccutlass::AlignedStruct< kVectorSize >
 Ccutlass::platform::alignment_of< value_t >Std::alignment_of
 Ccutlass::platform::alignment_of< double2 >
 Ccutlass::platform::alignment_of< double4 >
 Ccutlass::platform::alignment_of< float4 >
 Ccutlass::platform::alignment_of< int4 >
 Ccutlass::platform::alignment_of< long4 >
 Ccutlass::platform::alignment_of< longlong2 >
 Ccutlass::platform::alignment_of< longlong4 >
 Ccutlass::platform::alignment_of< uint4 >
 Ccutlass::platform::alignment_of< ulong4 >
 Ccutlass::platform::alignment_of< ulonglong2 >
 Ccutlass::platform::alignment_of< ulonglong4 >
 Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >
 Ccutlass::ComputeOffsetFromShape< Shape_ >Compute the offset for the given coordinates in a cube
 Ccutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 Ccutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 Ccutlass::ComputeOffsetFromStrides< Strides_ >Compute the offset for the given coordinates in a cube
 Ccutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >Compute the offset for the given coordinates in a cube with one channel and a depth of 1
 Ccutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >Compute the offset for the given coordinates in a cube with a depth of 1
 Ccutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_
 Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >Specialization for D=1 and C=1
 Ccutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >Specialization for D=1
 Ccutlass::platform::conditional< B, T, F >Std::conditional (true specialization)
 Ccutlass::platform::conditional< false, T, F >Std::conditional (false specialization)
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIteratorA const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes
 Ccutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
 Ccutlass::Convert< InputFragment_, OutputFragment_ >
 Ccutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
 Ccutlass::Coord< N_ >Statically-sized array specifying Coords within a tensor
 Ccutlass::Coord< 4 >
 Ccutlass::Coord< Rank >
 Ccutlass::Copy< Fragment_ >
 Ccutlass::platform::default_delete< T >Default deleter
 Ccutlass::platform::default_delete< T[]>Partial specialization for deleting array types
 Ccutlass::divide_assert< Dividend, Divisor >
 Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
 Ccutlass::platform::enable_if< C, T >Std::enable_if (true specialization)
 Ccutlass::platform::enable_if< false, T >Std::enable_if (false specialization)
 Ccutlass::Extent< T >Returns the extent of a scalar or vector
 Ccutlass::Extent< Vector< T, Lanes > >Returns the number of lanes of a vector if need be
 Ccutlass::Extent< Vector< T, Lanes > const >Returns the number of lanes of a vector if need be
 Ccutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
 Ccutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >A template defining Fragment Iterator Concept
 Ccutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::gemm::FragmentMultiplyAdd< Scalar_ >
 Ccutlass::gemm::FragmentMultiplyAdd< half >
 Ccutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
 Ccutlass::gemm::Gemm< GemmTraits_ >
 Ccutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
 Ccutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
 Ccutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
 Ccutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
 Ccutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
 Ccutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
 Ccutlass::gemm::GemmDesc< Scalar_, Index_ >
 Ccutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
 Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 Ccutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
 Ccutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >
 Ccutlass::GemmOperandGemm operand - D = A * B + C
 Ccutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >Helper to describe attributes of GEMM matrix operands
 Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
 Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
 Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >
 Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
 Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
 Ccutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
 Ccutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
 Ccutlass::gemm::GetExtent< kOperand_, Tile_ >
 Ccutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >
 Ccutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamAssemble the global load streams for A/B
 Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
 Ccutlass::platform::greater< T >Std::greater
 Ccutlass::gemm::HgemmSwizzle< GlobalIterator_ >
 Ccutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
 Ccutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ >
 Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ >
 Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::IdentityDescribes identity elements
 Ccutlass::gemm::IdentityBlockSwizzle
 Ccutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
 Ccutlass::gemm::IgemmEpilogueScalar< int >
 Ccutlass::gemm::IgemmFloatToInt8Converter< kElements_ >
 Ccutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ >
 Ccutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >
 Ccutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ >
 Ccutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >
 Ccutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >
 Ccutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ >
 Ccutlass::gemm::IgemmSwizzle< GlobalIterator_ >
 Ccutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
 Ccutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
 Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ >
 Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 Ccutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 Ccutlass::platform::integral_constant< value_t, V >Std::integral_constant
 Ccutlass::platform::integral_constant< bool, V >
 Ccutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
 Ccutlass::platform::integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>
 Ccutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
 Ccutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
 Ccutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
 Ccutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
 Ccutlass::platform::is_base_of_helper< BaseT, DerivedT >Helper for std::is_base_of
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::IteratorAn iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates
 Ccutlass::IteratorAdvanceSpecifies dimension in which post-increment accesses advance
 Ccutlass::IteratorFragmentSpecifies whether iterator storage fragment consists of Scalar values or WMMA matrix
 Ccutlass::platform::less< T >Std::less
 Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >Functor to compute linear combination of fragments
 Ccutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >
 Ccutlass::Load< double, 2, Memory_, true, 16 >
 Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >
 Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >
 Ccutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >
 Ccutlass::log2_down< N, CurrentVal, Count >
 Ccutlass::log2_down< N, 1, Count >
 Ccutlass::log2_up< N, CurrentVal, Count >
 Ccutlass::log2_up< N, 1, Count >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage
 Ccutlass::MatrixLayoutDescribes layouts of matrices
 Ccutlass::MemorySpaceEnum to specify which memory space data resides in
 Ccutlass::platform::nullptr_tStd::nullptr_t
 Ccutlass::platform::alignment_of< value_t >::pad
 Ccutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
 CParams
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ParamsThe params
 Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::ParamsThe params
 Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ParamsParameters to the iterator
 Ccutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::ParamsThe params
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ParamsThe params
 Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::ParamsThe params
 Ccutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::ParamsThe parameters
 Ccutlass::platform::plus< T >Platform::plus
 Ccutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >Adapter to enable random access to predicates via logical coordinate within a tile
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >Statically sized array of bits implementing
 Ccutlass::PredicateVector< Base::Iterations::kW >
 Ccutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount >
 Ccutlass::gemm::ProjectOperand< operand, Kstrided >
 Ccutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >Project A operand - (0, K, M)
 Ccutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >Project B operand - (0, K, N)
 Ccutlass::gemm::ProjectOperand< GemmOperand::kC, true >Project C operand - (0, N, M)
 Ccutlass::gemm::ProjectOperand< GemmOperand::kD, true >Project D operand - (0, N, M)
 Ccutlass::platform::remove_const< T >Std::remove_const (non-const specialization)
 Ccutlass::platform::remove_const< const T >Std::remove_const (const specialization)
 Ccutlass::platform::remove_cv< T >Std::remove_cv
 Ccutlass::platform::remove_volatile< T >Std::remove_volatile (non-volatile specialization)
 Ccutlass::platform::remove_volatile< volatile T >Std::remove_volatile (volatile specialization)
 Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
 Ccutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
 Ccutlass::ReshapeTile< Tile_, kAccessSize_, bool >
 Ccutlass::ReshapeTile< Tile_, kAccessSize_, true >
 Ccutlass::Shape< kD_, kH_, kW_, kC_ >A Shape implementing Layout Concept describing the dimensions of a cube
 Ccutlass::ShapeAdd< A_, B_ >
 Ccutlass::ShapeCount< Shape >Compute derived counted of a Layout Concept based class
 Ccutlass::ShapeDiv< A_, B_ >
 Ccutlass::ShapeMax< A_, B_ >
 Ccutlass::ShapeMin< A_, B_ >
 Ccutlass::ShapeMul< A_, B_ >
 Ccutlass::ShapeScale< A_, kScale_ >
 Ccutlass::ShapeStrides< Shape_ >
 Ccutlass::ShapeSub< A_, B_ >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamAssemble the shared load stream for A/B
 Ccutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
 Ccutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorageThe shared storage
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorageThe shared memory to swizzle the data in the epilogue
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorageThe storage in shared memory
 Ccutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorageThe storage in shared memory needed by that stream
 Ccutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
 Ccutlass::sqrt_est< N >
 Ccutlass::StorageType< kAlignment_ >
 Ccutlass::StorageType< 1 >
 Ccutlass::StorageType< 2 >
 Ccutlass::StorageType< 4 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >
 Ccutlass::Store< double, 2, Memory_, true, 16 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >
 Ccutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >
 Ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorageThe shared memory storage to exchange data
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >
 Ccutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >
 Ccutlass::TensorRef< Storage_, Rank_ >Structure modeling a pointer and stride into a tensor
 Ccutlass::TensorRef< T, 4 >
 Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >Template performing matrix multiply-add operation within a thread
 Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >Template performing matrix multiply-add operation within a thread
 Ccutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >Template performing matrix multiply-add operation within a thread
 Ccutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset
 Ccutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset
 Ccutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffsetComputes the thread offset in (H, W) based on thread ID
 Ccutlass::TiledThreadOffset< ThreadShape >Basic thread offset function computed from a thread shape
 Ccutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >Iterator for accessing a stripmined tile in memory
 Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
 Ccutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
 Ccutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >A template defining Tile Traits Concept
 Ccutlass::TileTraitsContiguousMajor< Tile_, Threads >
 Ccutlass::TileTraitsStandard< Tile_, Threads >Chooses 'best' shape to enable warp raking along contiguous dimension if possible
 Ccutlass::TileTraitsStrideMajor< Tile_, Threads >
 Ccutlass::TileTraitsWarpRake< Tile_, Threads >Tiling in which warps rake across the contiguous dimension
 Ccutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIteratorIterator that always returns true
 Ccutlass::TrivialPredicateTileAdapterAlways returns true predicate
 Ccutlass::platform::unique_ptr< T, Deleter >Std::unique_ptr
 Ccutlass::Vector< Scalar_, kLanes_ >
 Ccutlass::Vector< half, kLanes_ >
 Ccutlass::Vectorize< Element_, kLanes_ >
 Ccutlass::Vectorize< Element_, 1 >
 Ccutlass::VectorTraits< T >Traits describing properties of vectors and scalar-as-vectors
 Ccutlass::VectorTraits< Vector< T, Lanes > >Partial specialization for actual cutlass::Vector
 Ccutlass::VectorTraits< Vector< T, Lanes > const >Partial specialization for actual cutlass::Vector
+
+
+ + + + diff --git a/docs/generated-html/identity__block__swizzle_8h.html b/docs/generated-html/identity__block__swizzle_8h.html new file mode 100644 index 0000000000..3da48ad490 --- /dev/null +++ b/docs/generated-html/identity__block__swizzle_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: identity_block_swizzle.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
identity_block_swizzle.h File Reference
+
+
+ +

Defies functors for mapping blockIdx to partitions of the GEMM computation. +More...

+ +

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::IdentityBlockSwizzle
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+

Detailed Description

+

Currently, we only implement an identity mapping.

+
+ + + + diff --git a/docs/generated-html/identity__block__swizzle_8h_source.html b/docs/generated-html/identity__block__swizzle_8h_source.html new file mode 100644 index 0000000000..fb44c26beb --- /dev/null +++ b/docs/generated-html/identity__block__swizzle_8h_source.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: identity_block_swizzle.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
identity_block_swizzle.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
39  CUTLASS_DEVICE IdentityBlockSwizzle() {}
40 
42  CUTLASS_DEVICE dim3 swizzle() { return blockIdx; }
43 };
44 
46 
47 } // namespace gemm
48 } // namespace cutlass
Definition: convert.h:33
+
CUTLASS_DEVICE IdentityBlockSwizzle()
Ctor.
Definition: identity_block_swizzle.h:39
+
CUTLASS_DEVICE dim3 swizzle()
Swizzle the block index.
Definition: identity_block_swizzle.h:42
+
Definition: identity_block_swizzle.h:37
+
+ + + + diff --git a/docs/generated-html/igemm__epilogue_8h.html b/docs/generated-html/igemm__epilogue_8h.html new file mode 100644 index 0000000000..9b5e5ccf05 --- /dev/null +++ b/docs/generated-html/igemm__epilogue_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: igemm_epilogue.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_epilogue.h File Reference
+
+ + + + + diff --git a/docs/generated-html/igemm__epilogue_8h_source.html b/docs/generated-html/igemm__epilogue_8h_source.html new file mode 100644 index 0000000000..bfef820ae9 --- /dev/null +++ b/docs/generated-html/igemm__epilogue_8h_source.html @@ -0,0 +1,168 @@ + + + + + + + +Cutlass: igemm_epilogue.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_epilogue.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/convert.h>
32 #include <cutlass/fragment.h>
36 #include <cutlass/reshape_tile.h>
37 #include <cutlass/tile_iterator.h>
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 template <int kElements_>
50 
51  // We are packing 4 floats into int32 registers so we need kElements to be multiple of 4.
52  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
53 
55  CUTLASS_DEVICE IgemmFloatToInt8Converter() {}
56 
58  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
59  transform(src, 0, dst);
60  }
61 
63  template <typename Fragment_>
64  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
65  // The inputs.
66  float4 const* src_f4 = reinterpret_cast<float4 const*>(&src[0]);
67  // The outputs.
68  int* dst_int = reinterpret_cast<int*>(&dst[0]);
69 
70  // Iterate over the floats and pack them together to produce ints.
71  for (int i = 0; i < kElements_ / 4; ++i) {
72  // Read the float4.
73  float4 f4 = src_f4[i];
74 
75  // Clamp the 4 elements of the floats to the [-128, +127] range.
76  float x = fmaxf(-128.f, fminf(127.f, f4.x));
77  float y = fmaxf(-128.f, fminf(127.f, f4.y));
78  float z = fmaxf(-128.f, fminf(127.f, f4.z));
79  float w = fmaxf(-128.f, fminf(127.f, f4.w));
80 
81  // Convert to integers.
82  int ix = (int)x;
83  int iy = (int)y;
84  int iz = (int)z;
85  int iw = (int)w;
86 
87  // Extract the lower bytes to build an int32 with 4 int8.
88  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(ix) : "r"(iy));
89  asm volatile("prmt.b32 %0, %0, %1, 0x1140;" : "+r"(iz) : "r"(iw));
90  asm volatile("prmt.b32 %0, %0, %1, 0x5410;" : "+r"(ix) : "r"(iz));
91 
92  // Store the int.
93  dst_int[i] = ix;
94  }
95  }
96 };
97 
99 
100 template <typename InputScalar_, typename OutputFragment_>
103 };
104 
105 template <int kElements_>
106 struct IgemmGlobalStoreTransformer<float, Fragment<int8_t, kElements_> > {
108 };
109 
111 
112 template <int kElements_>
118 
119  // We are unpacking 4 int8s from int32.
120  static_assert(kElements_ % 4 == 0, "kElements must be multiple of 4");
121 
123  CUTLASS_DEVICE IgemmInt8ToFloatConverter() {}
124 
126  CUTLASS_DEVICE void transform(InputFragment const& src, OutputFragment& dst) {
127  transform(src, 0, dst);
128  }
129 
131  template <typename Fragment_>
132  CUTLASS_DEVICE void transform(Fragment_ const& src, int offset, OutputFragment& dst) {
133  // The inputs.
134  int const* src_int = reinterpret_cast<int const*>(&src[0]);
135  // The outputs.
136  float4* dst_f4 = reinterpret_cast<float4*>(&dst[0]);
137 
138  // Iterate over the int8 and unpack them together to produce floats.
139  for (int i = 0; i < kElements_ / 4; ++i) {
140  // Read the int.
141  int ix, iy, iz, iw = src_int[i];
142 
143  // Extract the 4 bytes.
144  asm volatile("prmt.b32 %0, 0x0, %1, 0x4440;" : "=r"(ix) : "r"(iw));
145  asm volatile("prmt.b32 %0, 0x0, %1, 0x4441;" : "=r"(iy) : "r"(iw));
146  asm volatile("prmt.b32 %0, 0x0, %1, 0x4442;" : "=r"(iz) : "r"(iw));
147  asm volatile("prmt.b32 %0, 0x0, %1, 0x4443;" : "=r"(iw) : "r"(iw));
148 
149  // The floats.
150  float fx, fy, fz, fw;
151 
152  // Convert to floats (make sure we generate I2F.F32.S8).
153  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fx) : "r"(ix));
154  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fy) : "r"(iy));
155  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fz) : "r"(iz));
156  asm volatile("cvt.rn.f32.s8 %0, %1;" : "=f"(fw) : "r"(iw));
157 
158  // Store the float4.
159  dst_f4[i] = make_float4(fx, fy, fz, fw);
160  }
161  }
162 };
163 
165 
166 template <typename InputFragment_, typename OutputScalar_>
169 };
170 
171 template <int kElements_>
172 struct IgemmGlobalLoadTransformer<Fragment<int8_t, kElements_>, float> {
174 };
175 
177 
178 template <typename InputScalar_, typename OutputFragment_>
181 };
182 
184 
185 template <typename IgemmConfig_, typename EpilogueFunctor_, typename Index_>
187  : public GemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_> {
191  typedef IgemmConfig_ IgemmConfig;
192 
194  typedef typename Base::Scalar Scalar;
196  typedef typename Base::Iterations Iterations;
198  typedef typename Base::Delta Delta;
199 
207  typedef
209 
217  typedef
219 
232  SharedStoreFragmentD>::Transformer
242 };
243 
245 
246 template <
248  typename IgemmConfig_,
250  typename EpilogueFunctor_,
252  typename Index_ = int,
256  // The output tile.
257  typename IgemmConfig_::OutputTile,
258  // The accumulators.
259  typename IgemmConfig_::Accumulators,
260  // The global iterator for C.
261  typename Helper_::GlobalLoadIteratorC,
262  // The transformer for C.
263  typename Helper_::GlobalTransformerC,
264  // The transformer for D.
265  typename Helper_::GlobalTransformerD,
266  // The global iterator for D.
267  typename Helper_::GlobalStoreIteratorD,
268  // The iterator to store D to shared memory.
269  typename Helper_::SharedStoreIteratorD,
270  // The shared store transformer for D.
271  typename Helper_::SharedStoreTransformerD,
272  // The iterator to load D from shared memory.
273  typename Helper_::SharedLoadIteratorD,
274  // The iterations.
275  typename Helper_::Iterations,
276  // The strides between iterations.
277  typename Helper_::Delta,
278  // The functor to be used in the epilogue.
279  EpilogueFunctor_,
280  // The index.
281  Index_> {
283  static bool const kInt8Output =
285 };
286 
288 
289 template <typename GemmEpilogueTraits_, bool = GemmEpilogueTraits_::kInt8Output>
290 struct IgemmEpilogue : public GemmEpilogue<GemmEpilogueTraits_> {
293 
295  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
296  typename Base::SharedStorage& shared_storage_,
297  typename Base::Index m_,
298  typename Base::Index n_)
299  : Base(params_, shared_storage_, m_, n_) {}
300 };
301 
303 
304 template <typename GemmEpilogueTraits_>
305 struct IgemmEpilogue<GemmEpilogueTraits_, true> : public GemmEpilogue<GemmEpilogueTraits_> {
308 
310  CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const& params_,
311  typename Base::SharedStorage& shared_storage_,
312  typename Base::Index m_,
313  typename Base::Index n_)
314  : Base(params_, shared_storage_, m_, n_) {}
315 };
316 
318 
319 } // namespace gemm
320 } // namespace cutlass
Definition: gemm_global_tile.h:116
+
Definition: igemm_epilogue.h:255
+
Definition: load_store.h:42
+
Base::Delta Delta
The iterations strides.
Definition: igemm_epilogue.h:198
+
Base::Fragment Fragment
Fragment definition.
Definition: tile_iterator.h:682
+
Base::SharedStoreTileTraits SharedStoreTileTraits
The traits class for the shared iterator to store D to shared memory.
Definition: igemm_epilogue.h:221
+
IgemmGlobalStoreTransformer< Scalar, GlobalFragmentD >::Transformer GlobalTransformerD
The transformer from accumulators to shared memory fragments.
Definition: igemm_epilogue.h:218
+
Definition: convert.h:33
+
Base::SharedLoadTileTraits SharedLoadTileTraits
The traits class for the shared iterator to load D from shared memory.
Definition: igemm_epilogue.h:235
+
TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
The shared iterator to load D from shared memory.
Definition: igemm_epilogue.h:241
+
Definition: gemm_epilogue_traits.h:171
+
GemmEpilogue< GemmEpilogueTraits_ > Base
The base class.
Definition: igemm_epilogue.h:292
+
Traits::Params Params
The params.
Definition: gemm_epilogue.h:57
+
Definition: gemm_epilogue.h:53
+
Definition: igemm_epilogue.h:167
+
std::is_same (false specialization)
Definition: platform.h:412
+
Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
+
CUTLASS_DEVICE IgemmInt8ToFloatConverter()
Ctor.
Definition: igemm_epilogue.h:123
+
SharedStoreIteratorD::Fragment SharedStoreFragmentD
The fragment that needs to be passed to that store iterator.
Definition: igemm_epilogue.h:229
+
EpilogueFunctor_::Scalar Scalar
The scalar.
Definition: gemm_epilogue_traits.h:173
+
Definition: igemm_epilogue.h:186
+
Definition: load_store.h:43
+
Fragment< int8_t, kElements_ > InputFragment
The input fragment.
Definition: igemm_epilogue.h:115
+
Definition: igemm_epilogue.h:290
+
Definition: igemm_epilogue.h:45
+
CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:64
+
Traits::SharedStorage SharedStorage
The shared storage.
Definition: gemm_epilogue.h:59
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Definition: tile_iterator.h:62
+
CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:126
+
Base::Scalar Scalar
The scalar type of the epilogue.
Definition: igemm_epilogue.h:194
+
GlobalLoadIteratorC::Fragment GlobalFragmentC
The fragment that needs to be produced by the load iterator.
Definition: igemm_epilogue.h:205
+
CUTLASS_DEVICE void transform(InputFragment const &src, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:58
+
Fragment< int8_t, kElements_ > OutputFragment
The output fragment.
Definition: igemm_epilogue.h:49
+
GemmGlobalIteratorCd< GlobalStoreTileTraits > GlobalStoreIteratorD
The iterator to store to shared memory.
Definition: igemm_epilogue.h:213
+
IgemmSharedStoreTransformer< typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD >::Transformer SharedStoreTransformerD
The transformer from accumulators to shared memory fragments.
Definition: igemm_epilogue.h:233
+
static bool const kInt8Output
Do we output in int8?
Definition: igemm_epilogue.h:283
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
Definition: igemm_epilogue.h:180
+
GemmEpilogue< GemmEpilogueTraits_ > Base
The base class.
Definition: igemm_epilogue.h:307
+
Defines a type for restructuring a tile.
+
Base::GlobalLoadTileTraits GlobalLoadTileTraits
The traits class for the iterator.
Definition: igemm_epilogue.h:201
+
Fragment< float, kElements_ > OutputFragment
The output fragment.
Definition: igemm_epilogue.h:117
+
GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Base
The base class.
Definition: igemm_epilogue.h:189
+
CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
Ctor.
Definition: igemm_epilogue.h:295
+
Definition: gemm_shared_tile.h:335
+
Traits::Index Index
The index.
Definition: gemm_epilogue.h:93
+
GlobalStoreIteratorD::Fragment GlobalFragmentD
The fragment that needs to be passed to that store iterator.
Definition: igemm_epilogue.h:215
+
GemmGlobalIteratorCd< GlobalLoadTileTraits > GlobalLoadIteratorC
The iterator to store to shared memory.
Definition: igemm_epilogue.h:203
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
IgemmConfig_ IgemmConfig
The config.
Definition: igemm_epilogue.h:191
+
CUTLASS_DEVICE IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
Ctor.
Definition: igemm_epilogue.h:310
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
CUTLASS_DEVICE IgemmFloatToInt8Converter()
Ctor.
Definition: igemm_epilogue.h:55
+
Element_ Element
The element.
Definition: fragment.h:108
+
Fragment< float, kElements_ > InputFragment
The input fragment.
Definition: igemm_epilogue.h:47
+
Definition: gemm_epilogue_traits.h:70
+
Definition: gemm_global_tile.h:348
+
Definition: igemm_epilogue.h:179
+
Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
+
Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSize > Fragment
The fragment.
Definition: tile_iterator.h:154
+
Definition: convert.h:38
+
IgemmFloatToInt8Converter< kElements_ > Transformer
Definition: igemm_epilogue.h:107
+
Base::Iterations Iterations
The iterations.
Definition: igemm_epilogue.h:196
+
IgemmGlobalLoadTransformer< GlobalFragmentC, Scalar >::Transformer GlobalTransformerC
The transformer from loaded data to math fragment.
Definition: igemm_epilogue.h:208
+
Base::GlobalStoreTileTraits GlobalStoreTileTraits
The traits class for the iterator.
Definition: igemm_epilogue.h:211
+
Convert< InputFragment_, Fragment< OutputScalar_, InputFragment_::kElements > > Transformer
Definition: igemm_epilogue.h:168
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:266
+
CUTLASS_DEVICE void transform(Fragment_ const &src, int offset, OutputFragment &dst)
Transform a fragment.
Definition: igemm_epilogue.h:132
+
Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
Definition: igemm_epilogue.h:102
+
Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
+
Definition: igemm_epilogue.h:101
+
TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal > SharedStoreIteratorD
The shared iterator to store D to shared memory.
Definition: igemm_epilogue.h:227
+
IgemmInt8ToFloatConverter< kElements_ > Transformer
Definition: igemm_epilogue.h:173
+
Defines conversion operations among Fragments of different base type.
+
Definition: igemm_epilogue.h:113
+
platform::remove_const< Scalar_ >::type Scalar
The scalar.
Definition: gemm_shared_tile.h:337
+
Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
+
Definition: gemm_shared_tile.h:264
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
+ + + + diff --git a/docs/generated-html/igemm__global__tile_8h.html b/docs/generated-html/igemm__global__tile_8h.html new file mode 100644 index 0000000000..d6a6801684 --- /dev/null +++ b/docs/generated-html/igemm__global__tile_8h.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: igemm_global_tile.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_global_tile.h File Reference
+
+
+ +

Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load each. Applies permute transformation to construct 'interleaved K-strided' data layout in which 4-element dot products from the same K index are arranged in consecutive locations within shared memory. +More...

+ +

Go to the source code of this file.

+ + + + + + + +

+Classes

struct  cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
 
struct  cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+

Detailed Description

+

Supports efficient loads from shared memory to target the DP4A instruction.

+
+ + + + diff --git a/docs/generated-html/igemm__global__tile_8h_source.html b/docs/generated-html/igemm__global__tile_8h_source.html new file mode 100644 index 0000000000..df086169df --- /dev/null +++ b/docs/generated-html/igemm__global__tile_8h_source.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: igemm_global_tile.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_global_tile.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
33 #pragma once
34 
35 #include <cutlass/coord.h>
37 #include <cutlass/matrix_traits.h>
38 
39 namespace cutlass {
40 namespace gemm {
41 
43 
44 template <GemmOperand::Kind kOperand_,
45  MatrixLayout::Kind kLayout_,
46  typename Scalar_,
47  typename Tile_,
48  typename Threads_,
49  int kAccessSize_>
51  // Which GEMM operand?
52  kOperand_,
53  // The layout.
54  kLayout_,
55  // The scalar.
56  Scalar_,
57  // The tile.
58  Tile_,
59  // The threads.
60  Threads_,
61  // The number of scalars per LDG/STG.
62  kAccessSize_> {
66  typedef typename Base::Threads Threads;
70  typedef Shape<Base::Tile::kH / Base::Threads::kH / 4,
71  4,
72  Base::Tile::kW / Base::Threads::kW,
73  Base::Tile::kC / Base::kAccessSize>
75 
77  struct ThreadOffset {
79  Coord<4> operator()() const {
80  int thread_offset_h = threadIdx.x / Threads::kW * ThreadsDelta::kH;
81  int thread_offset_w = threadIdx.x % Threads::kW * ThreadsDelta::kW;
82 
83  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
84  }
85  };
86 
87  public:
90 };
91 
93 
94 } // namespace gemm
95 } // namespace cutlass
Computes the thread offset in (H, W) based on thread ID.
Definition: igemm_global_tile.h:77
+
Definition: convert.h:33
+
Defines iterators for efficiently loading and storing to global memory.
+
Definition: gemm_global_tile.h:70
+
A Coord is a coordinate of arbitrary rank into a tensor or matrix.
+
CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
Helper to make a 2-element coordinate.
Definition: coord.h:241
+
Shape< Base::Threads::kH *4, 1, Base::Threads::kW, Base::kAccessSize > Delta
The strides in each dimension between different loads/stores.
Definition: igemm_global_tile.h:68
+
static int const kH
The height of the cube.
Definition: shape.h:68
+
GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
The base class.
Definition: igemm_global_tile.h:64
+
Shape< Base::Tile::kH/Base::Threads::kH/4, 4, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSize > Iterations
The number of iterations needed to load/store the tile.
Definition: igemm_global_tile.h:74
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
Definition: igemm_global_tile.h:50
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+ +
static int const kW
The width of the cube.
Definition: shape.h:70
+
Kind
Definition: matrix_traits.h:36
+
static int const kAccessSize
The number of scalars per LDG/STG.
Definition: gemm_global_tile.h:80
+
Kind
Definition: matrix_traits.h:43
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines properties of matrices used to denote layout and operands to GEMM kernels.
+
Shape< 1, 4, Base::Tile::kC > ThreadsDelta
The threads strides.
Definition: igemm_global_tile.h:89
+
CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
Definition: igemm_global_tile.h:79
+
Base::Threads Threads
The threads.
Definition: igemm_global_tile.h:66
+
+ + + + diff --git a/docs/generated-html/igemm__multiply__add_8h.html b/docs/generated-html/igemm__multiply__add_8h.html new file mode 100644 index 0000000000..266cb5f16c --- /dev/null +++ b/docs/generated-html/igemm__multiply__add_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: igemm_multiply_add.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_multiply_add.h File Reference
+
+
+ +

Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction. +More...

+ +

Go to the source code of this file.

+ + + + + +

+Classes

struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
 Template performing matrix multiply-add operation within a thread. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/igemm__multiply__add_8h_source.html b/docs/generated-html/igemm__multiply__add_8h_source.html new file mode 100644 index 0000000000..414c2ce175 --- /dev/null +++ b/docs/generated-html/igemm__multiply__add_8h_source.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: igemm_multiply_add.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_multiply_add.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
29 #pragma once
30 
31 #include <cutlass/fragment.h>
32 
34 
35 namespace cutlass {
36 namespace gemm {
37 
39 
41 template <typename AccumulatorsPerThread_, typename ThreadsPerWarp_>
42 struct ThreadMultiplyAdd<AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int> {
46  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
48  typedef ThreadsPerWarp_ ThreadsPerWarp;
52  typedef int8_t ScalarA;
56  typedef int8_t ScalarB;
60  typedef int ScalarC;
63 
65  CUTLASS_DEVICE ThreadMultiplyAdd() {}
66 
68  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
69  FragmentB const& b,
70  Accumulators const& c,
71  Accumulators& d) {
72  // The inputs.
73  int const* a_int = reinterpret_cast<int const*>(&a[0]);
74  int const* b_int = reinterpret_cast<int const*>(&b[0]);
75 
76  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
77  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
78  asm volatile("dp4a.s32.s32 %0, %1, %2, %3;"
79  : "=r"(d[j * AccumulatorsPerThread::kW + i])
80  : "r"(a_int[i]), "r"(b_int[j]), "r"(c[j * AccumulatorsPerThread::kW + i]));
81  }
82  }
83  }
84 };
85 
87 
88 } // namespace gemm
89 } // namespace cutlass
+
Definition: convert.h:33
+
Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
Definition: shape.h:119
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Template implementing matrix multiply-add operations on fragments.
+
Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
The accumulators.
Definition: igemm_multiply_add.h:62
+
ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
The number of accumulators per warp.
Definition: igemm_multiply_add.h:50
+
Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
The fragment for B.
Definition: igemm_multiply_add.h:58
+ +
Shape< 4, 1, 1 > InstructionShape
The shape of the instruction.
Definition: igemm_multiply_add.h:44
+
ThreadsPerWarp_ ThreadsPerWarp
The number of threads per warp.
Definition: igemm_multiply_add.h:48
+
AccumulatorsPerThread_ AccumulatorsPerThread
The number of accumulators per thread.
Definition: igemm_multiply_add.h:46
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
The fragment for A.
Definition: igemm_multiply_add.h:54
+ + +
CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
Multiply : d = a*b + c.
Definition: igemm_multiply_add.h:68
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
+ + + + diff --git a/docs/generated-html/igemm__swizzle_8h.html b/docs/generated-html/igemm__swizzle_8h.html new file mode 100644 index 0000000000..a631d215c0 --- /dev/null +++ b/docs/generated-html/igemm__swizzle_8h.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: igemm_swizzle.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_swizzle.h File Reference
+
+
+ +

Transposes a fragment of data containing packed 8-bit integer elements. +More...

+
#include <cutlass/fragment.h>
+
+

Go to the source code of this file.

+ + + + +

+Classes

struct  cutlass::gemm::IgemmSwizzle< GlobalIterator_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/igemm__swizzle_8h_source.html b/docs/generated-html/igemm__swizzle_8h_source.html new file mode 100644 index 0000000000..9399083015 --- /dev/null +++ b/docs/generated-html/igemm__swizzle_8h_source.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: igemm_swizzle.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_swizzle.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/fragment.h>
31 
32 namespace cutlass {
33 namespace gemm {
34 
36 
37 template <typename GlobalIterator_>
38 struct IgemmSwizzle {
40  typedef GlobalIterator_ GlobalIterator;
42  typedef typename GlobalIterator::Fragment Fragment;
44  typedef typename GlobalIterator::FragmentShape FragmentShape;
45 
50 
53 
55  static_assert(FragmentShape::kH % 4 == 0 && ShapeCount<FragmentShape>::kWc % 4 == 0,
56  "Not multiple of 4");
57 
59  CUTLASS_DEVICE IgemmSwizzle() {}
60 
62  CUTLASS_DEVICE void transform(Fragment const& src, Fragment& dst) {
63  // Expose src/dst as int arrays.
64  int const* src_int = reinterpret_cast<int const*>(&src[0]);
65  int* dst_int = reinterpret_cast<int*>(&dst[0]);
66 
67  // Transpose the data.
68  for (int d = 0; d < FragmentShape::kD; ++d) {
69  for (int h = 0; h < FragmentShape::kH / 4; ++h) {
70  for (int w = 0; w < ShapeCount<FragmentShape>::kWc / 4; ++w) {
71  int const i0 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
72  (4 * h + 0) * (ShapeCount<FragmentShape>::kWc / 4) + w;
73  int const i1 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
74  (4 * h + 1) * (ShapeCount<FragmentShape>::kWc / 4) + w;
75  int const i2 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
76  (4 * h + 2) * (ShapeCount<FragmentShape>::kWc / 4) + w;
77  int const i3 = d * (ShapeCount<FragmentShape>::kHwc / 4) +
78  (4 * h + 3) * (ShapeCount<FragmentShape>::kWc / 4) + w;
79 
80  int a0 = src_int[i0];
81  int a1 = src_int[i1];
82  int a2 = src_int[i2];
83  int a3 = src_int[i3];
84 
85  int b0, b1, b2, b3, c0;
86  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(b0) : "r"(a0), "r"(a1));
87  asm volatile("prmt.b32 %0, %1, %2, 0x0040;" : "=r"(c0) : "r"(a2), "r"(a3));
88  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b0) : "r"(b0), "r"(c0));
89 
90  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(b1) : "r"(a0), "r"(a1));
91  asm volatile("prmt.b32 %0, %1, %2, 0x0051;" : "=r"(c0) : "r"(a2), "r"(a3));
92  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b1) : "r"(b1), "r"(c0));
93 
94  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(b2) : "r"(a0), "r"(a1));
95  asm volatile("prmt.b32 %0, %1, %2, 0x0062;" : "=r"(c0) : "r"(a2), "r"(a3));
96  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b2) : "r"(b2), "r"(c0));
97 
98  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(b3) : "r"(a0), "r"(a1));
99  asm volatile("prmt.b32 %0, %1, %2, 0x0073;" : "=r"(c0) : "r"(a2), "r"(a3));
100  asm volatile("prmt.b32 %0, %1, %2, 0x5410;" : "=r"(b3) : "r"(b3), "r"(c0));
101 
102  dst_int[i0] = b0;
103  dst_int[i1] = b1;
104  dst_int[i2] = b2;
105  dst_int[i3] = b3;
106  }
107  }
108  }
109  }
110 };
111 
113 
114 } // namespace gemm
115 } // namespace cutlass
Definition: convert.h:33
+
std::is_same (false specialization)
Definition: platform.h:412
+
GlobalIterator::FragmentShape FragmentShape
The shape of the source fragment.
Definition: igemm_swizzle.h:44
+
Definition: igemm_swizzle.h:38
+
GlobalIterator_ GlobalIterator
The global iterator.
Definition: igemm_swizzle.h:40
+
CUTLASS_DEVICE void transform(Fragment const &src, Fragment &dst)
Transform a fragment.
Definition: igemm_swizzle.h:62
+
Fragment OutputFragment
The destination fragment.
Definition: igemm_swizzle.h:49
+
#define static_assert(__e, __m)
Definition: platform.h:145
+
Fragment InputFragment
The source fragment.
Definition: igemm_swizzle.h:47
+
GlobalIterator::Fragment Fragment
The source fragment.
Definition: igemm_swizzle.h:42
+
CUTLASS_DEVICE IgemmSwizzle()
The src/dst must be int8 fragments.
Definition: igemm_swizzle.h:59
+
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
+
Compute derived counted of a Layout Concept based class.
Definition: shape.h:79
+
+ + + + diff --git a/docs/generated-html/igemm__traits_8h.html b/docs/generated-html/igemm__traits_8h.html new file mode 100644 index 0000000000..32d14d876c --- /dev/null +++ b/docs/generated-html/igemm__traits_8h.html @@ -0,0 +1,150 @@ + + + + + + + +Cutlass: igemm_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
igemm_traits.h File Reference
+
+
+ +

Defies structural properties of mixed-precision integer GEMM. Multiplicands are assumed to be packed 8bit integers, accumulators are assumed to be 32b signed integers, and output formats vary. +More...

+ +

Go to the source code of this file.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >
 
struct  cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
 
struct  cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
 
struct  cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
 
struct  cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
 
struct  cutlass::gemm::IgemmEpilogueScalar< int >
 
struct  cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+
+ + + + diff --git a/docs/generated-html/igemm__traits_8h_source.html b/docs/generated-html/igemm__traits_8h_source.html new file mode 100644 index 0000000000..ecdd4f1df6 --- /dev/null +++ b/docs/generated-html/igemm__traits_8h_source.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: igemm_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
igemm_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
30 #pragma once
31 
32 #include <cutlass/convert.h>
33 #include <cutlass/gemm/gemm.h>
43 #include <cutlass/reshape_tile.h>
44 
45 namespace cutlass {
46 namespace gemm {
47 
49 
50 template <
52  typename OutputTile_,
54  typename ScalarD_,
56  typename AccumulatorsPerThread_>
58  : public GemmConfig<
60  int8_t,
62  int8_t,
64  ScalarD_,
66  ScalarD_,
68  OutputTile_,
70  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
72  4,
74  4,
76  16,
78  4,
80  4,
82  16,
84  1,
86  4,
88  1,
90  2> {};
91 
93 
94 template <typename OutputTile_, typename AccumulatorsPerThread_>
95 struct IgemmConfig<OutputTile_, int8_t, AccumulatorsPerThread_>
96  : public GemmConfig<
98  int8_t,
100  int8_t,
102  int8_t,
104  int8_t,
106  OutputTile_,
108  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, int8_t, int8_t, int>,
110  4,
112  4,
114  16,
116  4,
118  4,
120  16,
122  4,
124  4,
126  4,
128  2> {};
129 
131 
132 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
133 struct IgemmTileTraitsHelperA : public GemmTileTraitsHelperA<kLayout_, GemmConfig_> {};
134 
136 
137 template <typename GemmConfig_>
138 struct IgemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_>
139  : public GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
142 
144  static int const kScalarsPerStsA = 16;
145 
149  // The layout.
151  // The pointer is float const.
152  int8_t const,
153  // The tile has size KxM in GEMM's terminology.
155  // The threads are distributed as warps x 32 (the traits may reorganize).
157  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
158  4>
160 
163  // The pointer is float.
164  int8_t,
165  // The tile has size KxM in GEMM's terminology.
166  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>,
167  // The threads are distributed as warps x 32 (the traits may reorganize).
168  typename GlobalTileTraits::Threads,
169  // The number of scalars per STS (STS.32 or STS.128, etc).
170  kScalarsPerStsA>
172 };
173 
175 
176 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
177 struct IgemmTileTraitsHelperB : public GemmTileTraitsHelperB<kLayout_, GemmConfig_> {};
178 
180 
181 template <typename GemmConfig_>
182 struct IgemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_>
183  : public GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
186 
188  static int const kScalarsPerStsB = 16;
189 
193  // The layout.
195  // The pointer is float const.
196  int8_t const,
197  // The tile has size KxM in GEMM's terminology.
199  // The threads are distributed as warps x 32 (the traits may reorganize).
201  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
202  4>
204 
207  // The pointer is float.
208  int8_t,
209  // The tile has size KxM in GEMM's terminology.
210  Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>,
211  // The threads are distributed as warps x 32 (the traits may reorganize).
212  typename GlobalTileTraits::Threads,
213  // The number of scalars per STS (STS.32 or STS.128, etc).
214  kScalarsPerStsB>
216 };
217 
219 
220 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
222 
223 template <typename Iterator_>
224 struct IgemmTransformerA<MatrixLayout::kRowMajor, Iterator_> {
226 };
227 
228 template <typename Iterator_>
229 struct IgemmTransformerA<MatrixLayout::kColumnMajor, Iterator_> {
231 };
232 
234 
235 template <enum MatrixLayout::Kind kLayout_, typename Iterator_>
237 
238 template <typename Iterator_>
239 struct IgemmTransformerB<MatrixLayout::kColumnMajor, Iterator_> {
241 };
242 
243 template <typename Iterator_>
244 struct IgemmTransformerB<MatrixLayout::kRowMajor, Iterator_> {
246 };
247 
249 
250 template <
252  MatrixLayout::Kind kLayoutA_,
254  MatrixLayout::Kind kLayoutB_,
256  typename OutputTile_,
258  typename ScalarD_,
260  typename EpilogueFunctor_,
262  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
264  typename Index_ = int>
272 
277  typedef typename IgemmTransformerA<GemmTileTraitsHelperA::kLayout,
280  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
281  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
288 
292  // The default transformer for B.
293  typedef typename IgemmTransformerB<GemmTileTraitsHelperB::kLayout,
296  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
297  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
304 
306  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
307  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
315  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
316  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
323 
328 
331 };
332 
334 
335 template <typename ScalarD_>
337  typedef float Scalar;
338 };
339 
340 template <>
341 struct IgemmEpilogueScalar<int> {
342  typedef int Scalar;
343 };
344 
346 
347 template <
349  MatrixLayout::Kind kLayoutA_,
351  MatrixLayout::Kind kLayoutB_,
353  typename OutputTile_ = Shape<32, 128, 128>,
355  typename ScalarD_ = int,
359  typename AccumulatorsPerThread_ = Shape<32, 8, 8>,
361  typename Index_ = int,
363  typename Helper_ = IgemmTraitsHelper<kLayoutA_,
364  kLayoutB_,
365  OutputTile_,
366  ScalarD_,
367  EpilogueFunctor_,
368  AccumulatorsPerThread_,
369  Index_> >
370 struct IgemmTraits : public GemmTraits<
371  // The config.
372  typename Helper_::GemmConfig,
373  // The stream to load A from global memory to shared memory.
374  typename Helper_::GlobalLoadStreamA,
375  // The stream to load B from global memory to shared memory.
376  typename Helper_::GlobalLoadStreamB,
377  // The stream to load A from shared memory.
378  typename Helper_::SharedLoadStreamA,
379  // The stream to load B from shared memory.
380  typename Helper_::SharedLoadStreamB,
381  // The epilogue.
382  typename Helper_::Epilogue,
383  // The block swizzle to reorganize the grid.
384  IdentityBlockSwizzle,
385  // The index.
386  Index_,
387  // The tool used to clear accumulators.
388  typename Helper_::ClearAccumulators> {};
389 
391 
392 } // namespace gemm
393 } // namespace cutlass
Definition: load_store.h:42
+
TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorB
The iterator to load B from shared memory.
Definition: igemm_traits.h:319
+
Definition: convert.h:33
+
IgemmSwizzle< Iterator_ > Transformer
Definition: igemm_traits.h:230
+
Defines iterators for efficiently loading and storing to global memory.
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
The iterator to load A from global memory.
Definition: igemm_traits.h:275
+
Transposes a fragment of data containing packed 8-bit integer elements.
+
Copy< typename Iterator_::Fragment > Transformer
Definition: igemm_traits.h:240
+
Defines structural properties of complete GEMM computation.
+
GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB > GlobalLoadStreamB
The stream to load B from global memory to shared memory.
Definition: igemm_traits.h:303
+
Definition: igemm_traits.h:133
+
TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorB
The iterator to store B to shared memory.
Definition: igemm_traits.h:300
+
IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
Definition: igemm_traits.h:294
+
Definition: igemm_epilogue.h:290
+
IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
The traits class to build the iterator to load data from global memory for B^T.
Definition: igemm_traits.h:203
+
Definition: convert.h:69
+
GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
The base config.
Definition: igemm_traits.h:141
+
IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > GemmConfig
The IGEMM config.
Definition: igemm_traits.h:267
+
Definition: gemm_shared_tile.h:38
+
Definition: tile_iterator.h:62
+
Implements matrix multiply accumulate operation of 8-bit integer data using DP4A instruction.
+
Definition: gemm_global_tile.h:159
+
GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsB > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for B^N.
Definition: igemm_traits.h:215
+
Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
+
Definition: gemm_global_stream.h:161
+
Definition: gemm_traits.h:273
+
GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
The iterator to load B from global memory.
Definition: igemm_traits.h:291
+
IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
The traits class to build the iterator to load data from global memory for A^N.
Definition: igemm_traits.h:159
+
int Scalar
Definition: igemm_traits.h:342
+
IgemmSwizzle< Iterator_ > Transformer
Definition: igemm_traits.h:245
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
IgemmTileTraitsHelperB< kLayoutB_, GemmConfig > GemmTileTraitsHelperB
The GEMM config for B.
Definition: igemm_traits.h:271
+
Definition: igemm_swizzle.h:38
+
Definition: igemm_traits.h:177
+
Definition: igemm_traits.h:265
+
An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
Definition: tile_iterator.h:302
+
GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA > GlobalLoadStreamA
The stream to load A from global memory to shared memory.
Definition: igemm_traits.h:287
+
SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
The stream to load B from shared memory.
Definition: igemm_traits.h:322
+
Defines iterators for efficiently loading and storing tiles to and from shared memory.
+
Definition: matrix_traits.h:36
+
IgemmTileTraitsHelperA< kLayoutA_, GemmConfig > GemmTileTraitsHelperA
The GEMM config for A.
Definition: igemm_traits.h:269
+
Definition: gemm_shared_stream.h:44
+
Defines a type for restructuring a tile.
+
TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorA
The iterator to load A from shared memory.
Definition: igemm_traits.h:310
+
ClearAccumulators< typename MultiplyAdd::ScalarC > ClearAccumulators
The object to clear accumulators.
Definition: igemm_traits.h:327
+
Definition: gemm_traits.h:79
+
Definition: gemm_traits.h:137
+
Definition: matrix_traits.h:43
+
Definition: igemm_traits.h:57
+
Definition: igemm_traits.h:221
+
Definition: igemm_global_tile.h:50
+
float Scalar
Definition: igemm_traits.h:337
+
Definition: gemm_traits.h:428
+
Copy< typename Iterator_::Fragment > Transformer
Definition: igemm_traits.h:225
+
Definition: igemm_traits.h:370
+
A Shape implementing Layout Concept describing the dimensions of a cube.
Definition: shape.h:64
+
GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsA > SharedStoreTileTraits
The traits class to build the iterator to store data to shared memory for A^N.
Definition: igemm_traits.h:171
+ +
Template performing matrix multiply-add operation within a thread.
Definition: thread_multiply_add.h:43
+
Definition: matrix_traits.h:36
+ +
IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
The epilogue.
Definition: igemm_traits.h:330
+
IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
The default transformer for A.
Definition: igemm_traits.h:278
+
Kind
Definition: matrix_traits.h:36
+
Definition: igemm_traits.h:236
+
TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorA
The iterator to store A to shared memory.
Definition: igemm_traits.h:284
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Definition: matrix_traits.h:43
+
Implements a software-pipelined efficient GEMM.
+
ReshapeThreads< Tile, Threads_ >::Threads Threads
The threads shape.
Definition: gemm_global_tile.h:87
+
Defines structural properties of the GEMM epilogue.
+
Definition: igemm_traits.h:336
+
Defines the epilogue phase of the GEMM computation for IGEMM, supporting integer and floating-point o...
+
Defines conversion operations among Fragments of different base type.
+
GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
The base config.
Definition: igemm_traits.h:185
+
SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
The stream to load A from shared memory.
Definition: igemm_traits.h:313
+
Implements tile iterators to partition the thread block tile into 2D subtiles and efficiently load ea...
+
An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
Definition: tile_iterator.h:620
+
GemmConfig::MultiplyAdd MultiplyAdd
The multiply-add functor.
Definition: igemm_traits.h:325
+
+ + + + diff --git a/docs/generated-html/index.html b/docs/generated-html/index.html new file mode 100644 index 0000000000..f2ba68993a --- /dev/null +++ b/docs/generated-html/index.html @@ -0,0 +1,83 @@ + + + + + + + +Cutlass: Main Page + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + +
+ +
+
+ + +
+ +
+ +
+
+
Cutlass Documentation
+
+
+
+ + + + diff --git a/docs/generated-html/iterator__access_8h.html b/docs/generated-html/iterator__access_8h.html new file mode 100644 index 0000000000..cc41cd5af7 --- /dev/null +++ b/docs/generated-html/iterator__access_8h.html @@ -0,0 +1,175 @@ + + + + + + + +Cutlass: iterator_access.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
iterator_access.h File Reference
+
+
+ +

Free functions for loading and storing to implementations of tile iteartor concepts. +More...

+ +

Go to the source code of this file.

+ + + + +

+Namespaces

 cutlass
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

template<typename InputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator &iterator, Fragment &fragment)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment >
CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator &iterator, Fragment &fragment)
 Loads a fragment from a shared memory input iterator. More...
 
template<typename InputIterator , typename Fragment >
CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d)
 Loads a fragment from a shared memory input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 Loads a fragment from an input iterator, masked by a predicate iterator. More...
 
template<typename InputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 
template<typename InputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
 Loads a fragment from an input iterator. More...
 
template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
 Loads a fragment from an input iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator &iterator, Fragment &fragment)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_DEVICE void cutlass::shared_iterator_store (OutputIterator &iterator, Fragment const &fragment)
 Stores a fragment to a shared memory output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 Stores a fragment to an output iterator, masked by a predicate iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
 Stores a fragment to an output iterator, masked by a predicate iterator. More...
 
template<typename OutputIterator , typename Fragment >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
 Stores a fragment to an output iterator. More...
 
template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
 Stores a fragment to an output iterator. More...
 
+
+ + + + diff --git a/docs/generated-html/iterator__access_8h_source.html b/docs/generated-html/iterator__access_8h_source.html new file mode 100644 index 0000000000..11289a933b --- /dev/null +++ b/docs/generated-html/iterator__access_8h_source.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: iterator_access.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
iterator_access.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
31 #include <cutlass/load_store.h>
33 #include <cutlass/shape.h>
34 
35 namespace cutlass {
36 
38 
40 template <typename InputIterator, typename Fragment>
41 CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) {
42  typename InputIterator::FragmentIterator frag_iterator(fragment);
43  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
44  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
45  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
46  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
47  if (iterator.valid(d, h, w, c)) {
48  int const offset =
50  0, 0, w, c);
52  load(reinterpret_cast<typename InputIterator::AccessType &>(
53  frag_iterator.at(d, h, w, c)),
54  iterator.data(),
55  offset);
56  }
57  }
58  if (w < InputIterator::Iterations::kW - 1) {
59  iterator.inc_w();
60  }
61  }
62  if (h < InputIterator::Iterations::kH - 1) {
63  iterator.inc_h();
64  }
65  }
66  if (d < InputIterator::Iterations::kD - 1) {
67  iterator.inc_d();
68  }
69  }
70  iterator.inc_advance();
71 }
72 
74 template <typename InputIterator, typename Fragment>
75 CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment) {
76  typename InputIterator::FragmentIterator frag_iterator(fragment);
77  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
78  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
79  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
80  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
81  int const offset =
83  d, h, w, c);
84 
85  FragmentLoad<InputIterator::kIteratorFragment,
86  InputIterator::Tile::kC,
87  typename InputIterator::Scalar,
88  InputIterator::kMemorySpace,
89  typename InputIterator::FragmentElement,
90  InputIterator::Tile::kW>::load(frag_iterator.at(d, h, w, c),
91  iterator.data(),
92  offset);
93  }
94  }
95  }
96  }
97 }
98 
100 template <typename InputIterator, typename Fragment>
101 CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d) {
102  typename InputIterator::FragmentIterator frag_iterator(fragment);
103  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
104  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
105  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
106  int const offset =
108  d, h, w, c);
109 
110  FragmentLoad<InputIterator::kIteratorFragment,
111  InputIterator::Tile::kC,
112  typename InputIterator::Scalar,
113  InputIterator::kMemorySpace,
114  typename InputIterator::FragmentElement,
115  InputIterator::Tile::kW>::load(frag_iterator.at(0, h, w, c),
116  iterator.data(),
117  offset);
118  }
119  }
120  }
121 }
122 
124 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
126  Fragment &fragment,
127  typename InputIterator::Index offset,
128  ConstPredicateAdapter predicate_adapter) {
129  for (int d = 0; d < InputIterator::Iterations::kD; ++d, iterator.inc_d()) {
130  for (int h = 0; h < InputIterator::Iterations::kH; ++h, iterator.inc_h()) {
131  for (int w = 0; w < InputIterator::Iterations::kW; ++w, iterator.inc_w()) {
132  if (predicate_adapter.at(d, h, w, 0)) {
133  int idx = InputIterator::Tile::kC *
134  (w + InputIterator::Iterations::kW * (h + InputIterator::Iterations::kH * d));
135 
137  load(reinterpret_cast<typename InputIterator::AccessType &>(fragment[idx]),
138  iterator.data(),
139  offset);
140  }
141  }
142  }
143  }
144 }
145 
147 template <typename InputIterator, typename Fragment>
149  Fragment &fragment,
150  typename InputIterator::Index offset = 0) {
152  iterator_load_post_increment(iterator, fragment, offset, pred);
153 }
154 
156 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
158  Fragment &fragment,
159  ConstPredicateAdapter pred_it) {
160  iterator_load_post_increment(iterator, fragment, 0, pred_it);
161 }
162 
163 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
164 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &_iterator,
165  Fragment &fragment,
166  typename InputIterator::Index offset,
167  ConstPredicateAdapter predicate_adapter) {
168  InputIterator iterator(_iterator);
169  iterator_load_post_increment(iterator, fragment, offset, predicate_adapter);
170 }
171 
173 template <typename InputIterator, typename Fragment>
174 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator,
175  Fragment &fragment,
176  typename InputIterator::Index offset = 0) {
178  iterator_load(iterator, fragment, offset, pred);
179 }
180 
182 template <typename InputIterator, typename Fragment, typename ConstPredicateAdapter>
183 CUTLASS_HOST_DEVICE void iterator_load(InputIterator const &iterator,
184  Fragment &fragment,
185  ConstPredicateAdapter pred_it) {
186  iterator_load(iterator, fragment, 0, pred_it);
187 }
188 
190 
192 template <typename OutputIterator, typename Fragment>
193 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) {
194  typename OutputIterator::FragmentIterator frag_iterator(fragment);
195  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
196  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
197  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
198  if (iterator.valid(d, h, w, 0)) {
199  int const offset =
201  d, h, w, 0);
202 
203  Store<typename Fragment::Element,
204  OutputIterator::Tile::kC,
205  OutputIterator::kMemorySpace>::
206  store(reinterpret_cast<typename OutputIterator::AccessType &>(
207  frag_iterator.at(d, h, w, 0)),
208  iterator.data(),
209  offset);
210  }
211  if (w < OutputIterator::Iterations::kW - 1) {
212  iterator.inc_w();
213  }
214  }
215  if (h < OutputIterator::Iterations::kH - 1) {
216  iterator.inc_h();
217  }
218  }
219  if (d < OutputIterator::Iterations::kD - 1) {
220  iterator.inc_d();
221  }
222  }
223  iterator.inc_advance();
224 }
225 
227 template <typename OutputIterator, typename Fragment>
228 CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment) {
229  typename OutputIterator::FragmentConstIterator frag_iterator(fragment);
230  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
231  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
232  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
233  for (int c = 0; c < OutputIterator::Iterations::kC; ++c) {
234  int const offset =
236  d, h, w, c);
237 
238  FragmentStore<OutputIterator::kIteratorFragment,
239  OutputIterator::Tile::kC,
240  typename OutputIterator::Scalar,
241  OutputIterator::kMemorySpace,
242  typename OutputIterator::FragmentElement,
243  OutputIterator::Tile::kW>::store(frag_iterator.at(d, h, w, c),
244  iterator.data(),
245  offset);
246  }
247  }
248  }
249  }
250 }
251 
253 
255 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
257  Fragment const &fragment,
258  typename OutputIterator::Index offset,
259  ConstPredicateAdapter predicate_adapter) {
260  for (int d = 0; d < OutputIterator::Iterations::kD; ++d, iterator.inc_d()) {
261  for (int h = 0; h < OutputIterator::Iterations::kH; ++h, iterator.inc_h()) {
262  for (int w = 0; w < OutputIterator::Iterations::kW; ++w, iterator.inc_w()) {
263  if (predicate_adapter.at(d, h, w, 0)) {
264  int idx = OutputIterator::Tile::kC *
265  (w + OutputIterator::Iterations::kW * (h + OutputIterator::Iterations::kH * d));
266 
267  Store<typename Fragment::Element,
268  OutputIterator::Tile::kC,
269  OutputIterator::kMemorySpace>::
270  store(reinterpret_cast<typename OutputIterator::AccessType const &>(fragment[idx]),
271  iterator.data(),
272  offset);
273  }
274  }
275  }
276  }
277 }
278 
280 template <typename OutputIterator, typename Fragment>
282  Fragment const &fragment,
283  typename OutputIterator::Index offset = 0) {
285  iterator_store_post_increment(iterator, fragment, offset, pred);
286 }
287 
289 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
291  Fragment const &fragment,
292  ConstPredicateAdapter pred_it) {
293  iterator_store_post_increment(iterator, fragment, 0, pred_it);
294 }
295 
297 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
298 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &_iterator,
299  Fragment const &fragment,
300  typename OutputIterator::Index offset,
301  ConstPredicateAdapter predicate_adapter) {
302  OutputIterator iterator(_iterator);
303  iterator_store_post_increment(iterator, fragment, offset, predicate_adapter);
304 }
305 
307 template <typename OutputIterator, typename Fragment>
308 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator,
309  Fragment const &fragment,
310  typename OutputIterator::Index offset = 0) {
312  iterator_store(iterator, fragment, offset, pred);
313 }
314 
316 template <typename OutputIterator, typename Fragment, typename ConstPredicateAdapter>
317 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator const &iterator,
318  Fragment const &fragment,
319  ConstPredicateAdapter pred_it) {
320  iterator_store(iterator, fragment, 0, pred_it);
321 }
322 
324 
325 } // namespace cutlass
Definition: fragment_load_store.h:43
+
Definition: convert.h:33
+
CUTLASS_DEVICE void shared_iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from a shared memory input iterator.
Definition: iterator_access.h:75
+
CUTLASS_HOST_DEVICE void iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
Stores a fragment to an output iterator, masked by a predicate iterator.
Definition: iterator_access.h:256
+
Defines accessors for loading and storing fragments to memory efficiently.
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
+
A template defining Fragment Concept.
Definition: fragment.h:99
+
Definition: load_store.h:131
+
Defines container classes and iterators for managing a statically sized vector of boolean predicates...
+
static CUTLASS_DEVICE int get(int d, int h, int w, int c)
Definition: shape.h:211
+
CUTLASS_HOST_DEVICE void iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
Loads a fragment from an input iterator, masked by a predicate iterator.
Definition: iterator_access.h:125
+
Defines abstractions for efficiently loading and storing vectors to memory.
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_DEVICE void shared_iterator_store(OutputIterator &iterator, Fragment const &fragment)
Stores a fragment to a shared memory output iterator.
Definition: iterator_access.h:228
+
Element_ Element
The element.
Definition: fragment.h:108
+
Always returns true predicate.
Definition: predicate_vector.h:426
+
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Stores a fragment to an output iterator.
Definition: iterator_access.h:193
+
Definition: fragment_load_store.h:91
+
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Loads a fragment from an input iterator.
Definition: iterator_access.h:41
+
Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
+
+ + + + diff --git a/docs/generated-html/jquery.js b/docs/generated-html/jquery.js new file mode 100644 index 0000000000..2771c749a4 --- /dev/null +++ b/docs/generated-html/jquery.js @@ -0,0 +1,115 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +/*! + * jQuery JavaScript Library v1.7.1 + * http://jquery.com/ + * + * Copyright 2011, John Resig + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * Includes Sizzle.js + * http://sizzlejs.com/ + * Copyright 2011, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * + * Date: Mon Nov 21 21:11:03 2011 -0500 + */ +(function(bb,L){var av=bb.document,bu=bb.navigator,bl=bb.location;var b=(function(){var bF=function(b0,b1){return new bF.fn.init(b0,b1,bD)},bU=bb.jQuery,bH=bb.$,bD,bY=/^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,bM=/\S/,bI=/^\s+/,bE=/\s+$/,bA=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,bN=/^[\],:{}\s]*$/,bW=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,bP=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,bJ=/(?:^|:|,)(?:\s*\[)+/g,by=/(webkit)[ \/]([\w.]+)/,bR=/(opera)(?:.*version)?[ \/]([\w.]+)/,bQ=/(msie) ([\w.]+)/,bS=/(mozilla)(?:.*? rv:([\w.]+))?/,bB=/-([a-z]|[0-9])/ig,bZ=/^-ms-/,bT=function(b0,b1){return(b1+"").toUpperCase()},bX=bu.userAgent,bV,bC,e,bL=Object.prototype.toString,bG=Object.prototype.hasOwnProperty,bz=Array.prototype.push,bK=Array.prototype.slice,bO=String.prototype.trim,bv=Array.prototype.indexOf,bx={};bF.fn=bF.prototype={constructor:bF,init:function(b0,b4,b3){var b2,b5,b1,b6;if(!b0){return this}if(b0.nodeType){this.context=this[0]=b0;this.length=1;return this}if(b0==="body"&&!b4&&av.body){this.context=av;this[0]=av.body;this.selector=b0;this.length=1;return this}if(typeof b0==="string"){if(b0.charAt(0)==="<"&&b0.charAt(b0.length-1)===">"&&b0.length>=3){b2=[null,b0,null]}else{b2=bY.exec(b0)}if(b2&&(b2[1]||!b4)){if(b2[1]){b4=b4 instanceof bF?b4[0]:b4;b6=(b4?b4.ownerDocument||b4:av);b1=bA.exec(b0);if(b1){if(bF.isPlainObject(b4)){b0=[av.createElement(b1[1])];bF.fn.attr.call(b0,b4,true)}else{b0=[b6.createElement(b1[1])]}}else{b1=bF.buildFragment([b2[1]],[b6]);b0=(b1.cacheable?bF.clone(b1.fragment):b1.fragment).childNodes}return bF.merge(this,b0)}else{b5=av.getElementById(b2[2]);if(b5&&b5.parentNode){if(b5.id!==b2[2]){return b3.find(b0)}this.length=1;this[0]=b5}this.context=av;this.selector=b0;return this}}else{if(!b4||b4.jquery){return(b4||b3).find(b0)}else{return this.constructor(b4).find(b0)}}}else{if(bF.isFunction(b0)){return b3.ready(b0)}}if(b0.selector!==L){this.selector=b0.selector;this.context=b0.context}return bF.makeArray(b0,this)},selector:"",jquery:"1.7.1",length:0,size:function(){return this.length},toArray:function(){return bK.call(this,0)},get:function(b0){return b0==null?this.toArray():(b0<0?this[this.length+b0]:this[b0])},pushStack:function(b1,b3,b0){var b2=this.constructor();if(bF.isArray(b1)){bz.apply(b2,b1)}else{bF.merge(b2,b1)}b2.prevObject=this;b2.context=this.context;if(b3==="find"){b2.selector=this.selector+(this.selector?" ":"")+b0}else{if(b3){b2.selector=this.selector+"."+b3+"("+b0+")"}}return b2},each:function(b1,b0){return bF.each(this,b1,b0)},ready:function(b0){bF.bindReady();bC.add(b0);return this},eq:function(b0){b0=+b0;return b0===-1?this.slice(b0):this.slice(b0,b0+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(bK.apply(this,arguments),"slice",bK.call(arguments).join(","))},map:function(b0){return this.pushStack(bF.map(this,function(b2,b1){return b0.call(b2,b1,b2)}))},end:function(){return this.prevObject||this.constructor(null)},push:bz,sort:[].sort,splice:[].splice};bF.fn.init.prototype=bF.fn;bF.extend=bF.fn.extend=function(){var b9,b2,b0,b1,b6,b7,b5=arguments[0]||{},b4=1,b3=arguments.length,b8=false;if(typeof b5==="boolean"){b8=b5;b5=arguments[1]||{};b4=2}if(typeof b5!=="object"&&!bF.isFunction(b5)){b5={}}if(b3===b4){b5=this;--b4}for(;b40){return}bC.fireWith(av,[bF]);if(bF.fn.trigger){bF(av).trigger("ready").off("ready")}}},bindReady:function(){if(bC){return}bC=bF.Callbacks("once memory");if(av.readyState==="complete"){return setTimeout(bF.ready,1)}if(av.addEventListener){av.addEventListener("DOMContentLoaded",e,false);bb.addEventListener("load",bF.ready,false)}else{if(av.attachEvent){av.attachEvent("onreadystatechange",e);bb.attachEvent("onload",bF.ready);var b0=false;try{b0=bb.frameElement==null}catch(b1){}if(av.documentElement.doScroll&&b0){bw()}}}},isFunction:function(b0){return bF.type(b0)==="function"},isArray:Array.isArray||function(b0){return bF.type(b0)==="array"},isWindow:function(b0){return b0&&typeof b0==="object"&&"setInterval" in b0},isNumeric:function(b0){return !isNaN(parseFloat(b0))&&isFinite(b0)},type:function(b0){return b0==null?String(b0):bx[bL.call(b0)]||"object"},isPlainObject:function(b2){if(!b2||bF.type(b2)!=="object"||b2.nodeType||bF.isWindow(b2)){return false}try{if(b2.constructor&&!bG.call(b2,"constructor")&&!bG.call(b2.constructor.prototype,"isPrototypeOf")){return false}}catch(b1){return false}var b0;for(b0 in b2){}return b0===L||bG.call(b2,b0)},isEmptyObject:function(b1){for(var b0 in b1){return false}return true},error:function(b0){throw new Error(b0)},parseJSON:function(b0){if(typeof b0!=="string"||!b0){return null}b0=bF.trim(b0);if(bb.JSON&&bb.JSON.parse){return bb.JSON.parse(b0)}if(bN.test(b0.replace(bW,"@").replace(bP,"]").replace(bJ,""))){return(new Function("return "+b0))()}bF.error("Invalid JSON: "+b0)},parseXML:function(b2){var b0,b1;try{if(bb.DOMParser){b1=new DOMParser();b0=b1.parseFromString(b2,"text/xml")}else{b0=new ActiveXObject("Microsoft.XMLDOM");b0.async="false";b0.loadXML(b2)}}catch(b3){b0=L}if(!b0||!b0.documentElement||b0.getElementsByTagName("parsererror").length){bF.error("Invalid XML: "+b2)}return b0},noop:function(){},globalEval:function(b0){if(b0&&bM.test(b0)){(bb.execScript||function(b1){bb["eval"].call(bb,b1)})(b0)}},camelCase:function(b0){return b0.replace(bZ,"ms-").replace(bB,bT)},nodeName:function(b1,b0){return b1.nodeName&&b1.nodeName.toUpperCase()===b0.toUpperCase()},each:function(b3,b6,b2){var b1,b4=0,b5=b3.length,b0=b5===L||bF.isFunction(b3);if(b2){if(b0){for(b1 in b3){if(b6.apply(b3[b1],b2)===false){break}}}else{for(;b40&&b0[0]&&b0[b1-1])||b1===0||bF.isArray(b0));if(b3){for(;b21?aJ.call(arguments,0):bG;if(!(--bw)){bC.resolveWith(bC,bx)}}}function bz(bF){return function(bG){bB[bF]=arguments.length>1?aJ.call(arguments,0):bG;bC.notifyWith(bE,bB)}}if(e>1){for(;bv
a";bI=bv.getElementsByTagName("*");bF=bv.getElementsByTagName("a")[0];if(!bI||!bI.length||!bF){return{}}bG=av.createElement("select");bx=bG.appendChild(av.createElement("option"));bE=bv.getElementsByTagName("input")[0];bJ={leadingWhitespace:(bv.firstChild.nodeType===3),tbody:!bv.getElementsByTagName("tbody").length,htmlSerialize:!!bv.getElementsByTagName("link").length,style:/top/.test(bF.getAttribute("style")),hrefNormalized:(bF.getAttribute("href")==="/a"),opacity:/^0.55/.test(bF.style.opacity),cssFloat:!!bF.style.cssFloat,checkOn:(bE.value==="on"),optSelected:bx.selected,getSetAttribute:bv.className!=="t",enctype:!!av.createElement("form").enctype,html5Clone:av.createElement("nav").cloneNode(true).outerHTML!=="<:nav>",submitBubbles:true,changeBubbles:true,focusinBubbles:false,deleteExpando:true,noCloneEvent:true,inlineBlockNeedsLayout:false,shrinkWrapBlocks:false,reliableMarginRight:true};bE.checked=true;bJ.noCloneChecked=bE.cloneNode(true).checked;bG.disabled=true;bJ.optDisabled=!bx.disabled;try{delete bv.test}catch(bC){bJ.deleteExpando=false}if(!bv.addEventListener&&bv.attachEvent&&bv.fireEvent){bv.attachEvent("onclick",function(){bJ.noCloneEvent=false});bv.cloneNode(true).fireEvent("onclick")}bE=av.createElement("input");bE.value="t";bE.setAttribute("type","radio");bJ.radioValue=bE.value==="t";bE.setAttribute("checked","checked");bv.appendChild(bE);bD=av.createDocumentFragment();bD.appendChild(bv.lastChild);bJ.checkClone=bD.cloneNode(true).cloneNode(true).lastChild.checked;bJ.appendChecked=bE.checked;bD.removeChild(bE);bD.appendChild(bv);bv.innerHTML="";if(bb.getComputedStyle){bA=av.createElement("div");bA.style.width="0";bA.style.marginRight="0";bv.style.width="2px";bv.appendChild(bA);bJ.reliableMarginRight=(parseInt((bb.getComputedStyle(bA,null)||{marginRight:0}).marginRight,10)||0)===0}if(bv.attachEvent){for(by in {submit:1,change:1,focusin:1}){bB="on"+by;bw=(bB in bv);if(!bw){bv.setAttribute(bB,"return;");bw=(typeof bv[bB]==="function")}bJ[by+"Bubbles"]=bw}}bD.removeChild(bv);bD=bG=bx=bA=bv=bE=null;b(function(){var bM,bU,bV,bT,bN,bO,bL,bS,bR,e,bP,bQ=av.getElementsByTagName("body")[0];if(!bQ){return}bL=1;bS="position:absolute;top:0;left:0;width:1px;height:1px;margin:0;";bR="visibility:hidden;border:0;";e="style='"+bS+"border:5px solid #000;padding:0;'";bP="
";bM=av.createElement("div");bM.style.cssText=bR+"width:0;height:0;position:static;top:0;margin-top:"+bL+"px";bQ.insertBefore(bM,bQ.firstChild);bv=av.createElement("div");bM.appendChild(bv);bv.innerHTML="
t
";bz=bv.getElementsByTagName("td");bw=(bz[0].offsetHeight===0);bz[0].style.display="";bz[1].style.display="none";bJ.reliableHiddenOffsets=bw&&(bz[0].offsetHeight===0);bv.innerHTML="";bv.style.width=bv.style.paddingLeft="1px";b.boxModel=bJ.boxModel=bv.offsetWidth===2;if(typeof bv.style.zoom!=="undefined"){bv.style.display="inline";bv.style.zoom=1;bJ.inlineBlockNeedsLayout=(bv.offsetWidth===2);bv.style.display="";bv.innerHTML="
";bJ.shrinkWrapBlocks=(bv.offsetWidth!==2)}bv.style.cssText=bS+bR;bv.innerHTML=bP;bU=bv.firstChild;bV=bU.firstChild;bN=bU.nextSibling.firstChild.firstChild;bO={doesNotAddBorder:(bV.offsetTop!==5),doesAddBorderForTableAndCells:(bN.offsetTop===5)};bV.style.position="fixed";bV.style.top="20px";bO.fixedPosition=(bV.offsetTop===20||bV.offsetTop===15);bV.style.position=bV.style.top="";bU.style.overflow="hidden";bU.style.position="relative";bO.subtractsBorderForOverflowNotVisible=(bV.offsetTop===-5);bO.doesNotIncludeMarginInBodyOffset=(bQ.offsetTop!==bL);bQ.removeChild(bM);bv=bM=null;b.extend(bJ,bO)});return bJ})();var aS=/^(?:\{.*\}|\[.*\])$/,aA=/([A-Z])/g;b.extend({cache:{},uuid:0,expando:"jQuery"+(b.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:true,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:true},hasData:function(e){e=e.nodeType?b.cache[e[b.expando]]:e[b.expando];return !!e&&!S(e)},data:function(bx,bv,bz,by){if(!b.acceptData(bx)){return}var bG,bA,bD,bE=b.expando,bC=typeof bv==="string",bF=bx.nodeType,e=bF?b.cache:bx,bw=bF?bx[bE]:bx[bE]&&bE,bB=bv==="events";if((!bw||!e[bw]||(!bB&&!by&&!e[bw].data))&&bC&&bz===L){return}if(!bw){if(bF){bx[bE]=bw=++b.uuid}else{bw=bE}}if(!e[bw]){e[bw]={};if(!bF){e[bw].toJSON=b.noop}}if(typeof bv==="object"||typeof bv==="function"){if(by){e[bw]=b.extend(e[bw],bv)}else{e[bw].data=b.extend(e[bw].data,bv)}}bG=bA=e[bw];if(!by){if(!bA.data){bA.data={}}bA=bA.data}if(bz!==L){bA[b.camelCase(bv)]=bz}if(bB&&!bA[bv]){return bG.events}if(bC){bD=bA[bv];if(bD==null){bD=bA[b.camelCase(bv)]}}else{bD=bA}return bD},removeData:function(bx,bv,by){if(!b.acceptData(bx)){return}var bB,bA,bz,bC=b.expando,bD=bx.nodeType,e=bD?b.cache:bx,bw=bD?bx[bC]:bC;if(!e[bw]){return}if(bv){bB=by?e[bw]:e[bw].data;if(bB){if(!b.isArray(bv)){if(bv in bB){bv=[bv]}else{bv=b.camelCase(bv);if(bv in bB){bv=[bv]}else{bv=bv.split(" ")}}}for(bA=0,bz=bv.length;bA-1){return true}}return false},val:function(bx){var e,bv,by,bw=this[0];if(!arguments.length){if(bw){e=b.valHooks[bw.nodeName.toLowerCase()]||b.valHooks[bw.type];if(e&&"get" in e&&(bv=e.get(bw,"value"))!==L){return bv}bv=bw.value;return typeof bv==="string"?bv.replace(aU,""):bv==null?"":bv}return}by=b.isFunction(bx);return this.each(function(bA){var bz=b(this),bB;if(this.nodeType!==1){return}if(by){bB=bx.call(this,bA,bz.val())}else{bB=bx}if(bB==null){bB=""}else{if(typeof bB==="number"){bB+=""}else{if(b.isArray(bB)){bB=b.map(bB,function(bC){return bC==null?"":bC+""})}}}e=b.valHooks[this.nodeName.toLowerCase()]||b.valHooks[this.type];if(!e||!("set" in e)||e.set(this,bB,"value")===L){this.value=bB}})}});b.extend({valHooks:{option:{get:function(e){var bv=e.attributes.value;return !bv||bv.specified?e.value:e.text}},select:{get:function(e){var bA,bv,bz,bx,by=e.selectedIndex,bB=[],bC=e.options,bw=e.type==="select-one";if(by<0){return null}bv=bw?by:0;bz=bw?by+1:bC.length;for(;bv=0});if(!e.length){bv.selectedIndex=-1}return e}}},attrFn:{val:true,css:true,html:true,text:true,data:true,width:true,height:true,offset:true},attr:function(bA,bx,bB,bz){var bw,e,by,bv=bA.nodeType;if(!bA||bv===3||bv===8||bv===2){return}if(bz&&bx in b.attrFn){return b(bA)[bx](bB)}if(typeof bA.getAttribute==="undefined"){return b.prop(bA,bx,bB)}by=bv!==1||!b.isXMLDoc(bA);if(by){bx=bx.toLowerCase();e=b.attrHooks[bx]||(ao.test(bx)?aY:be)}if(bB!==L){if(bB===null){b.removeAttr(bA,bx);return}else{if(e&&"set" in e&&by&&(bw=e.set(bA,bB,bx))!==L){return bw}else{bA.setAttribute(bx,""+bB);return bB}}}else{if(e&&"get" in e&&by&&(bw=e.get(bA,bx))!==null){return bw}else{bw=bA.getAttribute(bx);return bw===null?L:bw}}},removeAttr:function(bx,bz){var by,bA,bv,e,bw=0;if(bz&&bx.nodeType===1){bA=bz.toLowerCase().split(af);e=bA.length;for(;bw=0)}}})});var bd=/^(?:textarea|input|select)$/i,n=/^([^\.]*)?(?:\.(.+))?$/,J=/\bhover(\.\S+)?\b/,aO=/^key/,bf=/^(?:mouse|contextmenu)|click/,T=/^(?:focusinfocus|focusoutblur)$/,U=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,Y=function(e){var bv=U.exec(e);if(bv){bv[1]=(bv[1]||"").toLowerCase();bv[3]=bv[3]&&new RegExp("(?:^|\\s)"+bv[3]+"(?:\\s|$)")}return bv},j=function(bw,e){var bv=bw.attributes||{};return((!e[1]||bw.nodeName.toLowerCase()===e[1])&&(!e[2]||(bv.id||{}).value===e[2])&&(!e[3]||e[3].test((bv["class"]||{}).value)))},bt=function(e){return b.event.special.hover?e:e.replace(J,"mouseenter$1 mouseleave$1")};b.event={add:function(bx,bC,bJ,bA,by){var bD,bB,bK,bI,bH,bF,e,bG,bv,bz,bw,bE;if(bx.nodeType===3||bx.nodeType===8||!bC||!bJ||!(bD=b._data(bx))){return}if(bJ.handler){bv=bJ;bJ=bv.handler}if(!bJ.guid){bJ.guid=b.guid++}bK=bD.events;if(!bK){bD.events=bK={}}bB=bD.handle;if(!bB){bD.handle=bB=function(bL){return typeof b!=="undefined"&&(!bL||b.event.triggered!==bL.type)?b.event.dispatch.apply(bB.elem,arguments):L};bB.elem=bx}bC=b.trim(bt(bC)).split(" ");for(bI=0;bI=0){bG=bG.slice(0,-1);bw=true}if(bG.indexOf(".")>=0){bx=bG.split(".");bG=bx.shift();bx.sort()}if((!bA||b.event.customEvent[bG])&&!b.event.global[bG]){return}bv=typeof bv==="object"?bv[b.expando]?bv:new b.Event(bG,bv):new b.Event(bG);bv.type=bG;bv.isTrigger=true;bv.exclusive=bw;bv.namespace=bx.join(".");bv.namespace_re=bv.namespace?new RegExp("(^|\\.)"+bx.join("\\.(?:.*\\.)?")+"(\\.|$)"):null;by=bG.indexOf(":")<0?"on"+bG:"";if(!bA){e=b.cache;for(bC in e){if(e[bC].events&&e[bC].events[bG]){b.event.trigger(bv,bD,e[bC].handle.elem,true)}}return}bv.result=L;if(!bv.target){bv.target=bA}bD=bD!=null?b.makeArray(bD):[];bD.unshift(bv);bF=b.event.special[bG]||{};if(bF.trigger&&bF.trigger.apply(bA,bD)===false){return}bB=[[bA,bF.bindType||bG]];if(!bJ&&!bF.noBubble&&!b.isWindow(bA)){bI=bF.delegateType||bG;bH=T.test(bI+bG)?bA:bA.parentNode;bz=null;for(;bH;bH=bH.parentNode){bB.push([bH,bI]);bz=bH}if(bz&&bz===bA.ownerDocument){bB.push([bz.defaultView||bz.parentWindow||bb,bI])}}for(bC=0;bCbA){bH.push({elem:this,matches:bz.slice(bA)})}for(bC=0;bC0?this.on(e,null,bx,bw):this.trigger(e)};if(b.attrFn){b.attrFn[e]=true}if(aO.test(e)){b.event.fixHooks[e]=b.event.keyHooks}if(bf.test(e)){b.event.fixHooks[e]=b.event.mouseHooks}}); +/*! + * Sizzle CSS Selector Engine + * Copyright 2011, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * More information: http://sizzlejs.com/ + */ +(function(){var bH=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,bC="sizcache"+(Math.random()+"").replace(".",""),bI=0,bL=Object.prototype.toString,bB=false,bA=true,bK=/\\/g,bO=/\r\n/g,bQ=/\W/;[0,0].sort(function(){bA=false;return 0});var by=function(bV,e,bY,bZ){bY=bY||[];e=e||av;var b1=e;if(e.nodeType!==1&&e.nodeType!==9){return[]}if(!bV||typeof bV!=="string"){return bY}var bS,b3,b6,bR,b2,b5,b4,bX,bU=true,bT=by.isXML(e),bW=[],b0=bV;do{bH.exec("");bS=bH.exec(b0);if(bS){b0=bS[3];bW.push(bS[1]);if(bS[2]){bR=bS[3];break}}}while(bS);if(bW.length>1&&bD.exec(bV)){if(bW.length===2&&bE.relative[bW[0]]){b3=bM(bW[0]+bW[1],e,bZ)}else{b3=bE.relative[bW[0]]?[e]:by(bW.shift(),e);while(bW.length){bV=bW.shift();if(bE.relative[bV]){bV+=bW.shift()}b3=bM(bV,b3,bZ)}}}else{if(!bZ&&bW.length>1&&e.nodeType===9&&!bT&&bE.match.ID.test(bW[0])&&!bE.match.ID.test(bW[bW.length-1])){b2=by.find(bW.shift(),e,bT);e=b2.expr?by.filter(b2.expr,b2.set)[0]:b2.set[0]}if(e){b2=bZ?{expr:bW.pop(),set:bF(bZ)}:by.find(bW.pop(),bW.length===1&&(bW[0]==="~"||bW[0]==="+")&&e.parentNode?e.parentNode:e,bT);b3=b2.expr?by.filter(b2.expr,b2.set):b2.set;if(bW.length>0){b6=bF(b3)}else{bU=false}while(bW.length){b5=bW.pop();b4=b5;if(!bE.relative[b5]){b5=""}else{b4=bW.pop()}if(b4==null){b4=e}bE.relative[b5](b6,b4,bT)}}else{b6=bW=[]}}if(!b6){b6=b3}if(!b6){by.error(b5||bV)}if(bL.call(b6)==="[object Array]"){if(!bU){bY.push.apply(bY,b6)}else{if(e&&e.nodeType===1){for(bX=0;b6[bX]!=null;bX++){if(b6[bX]&&(b6[bX]===true||b6[bX].nodeType===1&&by.contains(e,b6[bX]))){bY.push(b3[bX])}}}else{for(bX=0;b6[bX]!=null;bX++){if(b6[bX]&&b6[bX].nodeType===1){bY.push(b3[bX])}}}}}else{bF(b6,bY)}if(bR){by(bR,b1,bY,bZ);by.uniqueSort(bY)}return bY};by.uniqueSort=function(bR){if(bJ){bB=bA;bR.sort(bJ);if(bB){for(var e=1;e0};by.find=function(bX,e,bY){var bW,bS,bU,bT,bV,bR;if(!bX){return[]}for(bS=0,bU=bE.order.length;bS":function(bW,bR){var bV,bU=typeof bR==="string",bS=0,e=bW.length;if(bU&&!bQ.test(bR)){bR=bR.toLowerCase();for(;bS=0)){if(!bS){e.push(bV)}}else{if(bS){bR[bU]=false}}}}return false},ID:function(e){return e[1].replace(bK,"")},TAG:function(bR,e){return bR[1].replace(bK,"").toLowerCase()},CHILD:function(e){if(e[1]==="nth"){if(!e[2]){by.error(e[0])}e[2]=e[2].replace(/^\+|\s*/g,"");var bR=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(e[2]==="even"&&"2n"||e[2]==="odd"&&"2n+1"||!/\D/.test(e[2])&&"0n+"+e[2]||e[2]);e[2]=(bR[1]+(bR[2]||1))-0;e[3]=bR[3]-0}else{if(e[2]){by.error(e[0])}}e[0]=bI++;return e},ATTR:function(bU,bR,bS,e,bV,bW){var bT=bU[1]=bU[1].replace(bK,"");if(!bW&&bE.attrMap[bT]){bU[1]=bE.attrMap[bT]}bU[4]=(bU[4]||bU[5]||"").replace(bK,"");if(bU[2]==="~="){bU[4]=" "+bU[4]+" "}return bU},PSEUDO:function(bU,bR,bS,e,bV){if(bU[1]==="not"){if((bH.exec(bU[3])||"").length>1||/^\w/.test(bU[3])){bU[3]=by(bU[3],null,null,bR)}else{var bT=by.filter(bU[3],bR,bS,true^bV);if(!bS){e.push.apply(e,bT)}return false}}else{if(bE.match.POS.test(bU[0])||bE.match.CHILD.test(bU[0])){return true}}return bU},POS:function(e){e.unshift(true);return e}},filters:{enabled:function(e){return e.disabled===false&&e.type!=="hidden"},disabled:function(e){return e.disabled===true},checked:function(e){return e.checked===true},selected:function(e){if(e.parentNode){e.parentNode.selectedIndex}return e.selected===true},parent:function(e){return !!e.firstChild},empty:function(e){return !e.firstChild},has:function(bS,bR,e){return !!by(e[3],bS).length},header:function(e){return(/h\d/i).test(e.nodeName)},text:function(bS){var e=bS.getAttribute("type"),bR=bS.type;return bS.nodeName.toLowerCase()==="input"&&"text"===bR&&(e===bR||e===null)},radio:function(e){return e.nodeName.toLowerCase()==="input"&&"radio"===e.type},checkbox:function(e){return e.nodeName.toLowerCase()==="input"&&"checkbox"===e.type},file:function(e){return e.nodeName.toLowerCase()==="input"&&"file"===e.type},password:function(e){return e.nodeName.toLowerCase()==="input"&&"password"===e.type},submit:function(bR){var e=bR.nodeName.toLowerCase();return(e==="input"||e==="button")&&"submit"===bR.type},image:function(e){return e.nodeName.toLowerCase()==="input"&&"image"===e.type},reset:function(bR){var e=bR.nodeName.toLowerCase();return(e==="input"||e==="button")&&"reset"===bR.type},button:function(bR){var e=bR.nodeName.toLowerCase();return e==="input"&&"button"===bR.type||e==="button"},input:function(e){return(/input|select|textarea|button/i).test(e.nodeName)},focus:function(e){return e===e.ownerDocument.activeElement}},setFilters:{first:function(bR,e){return e===0},last:function(bS,bR,e,bT){return bR===bT.length-1},even:function(bR,e){return e%2===0},odd:function(bR,e){return e%2===1},lt:function(bS,bR,e){return bRe[3]-0},nth:function(bS,bR,e){return e[3]-0===bR},eq:function(bS,bR,e){return e[3]-0===bR}},filter:{PSEUDO:function(bS,bX,bW,bY){var e=bX[1],bR=bE.filters[e];if(bR){return bR(bS,bW,bX,bY)}else{if(e==="contains"){return(bS.textContent||bS.innerText||bw([bS])||"").indexOf(bX[3])>=0}else{if(e==="not"){var bT=bX[3];for(var bV=0,bU=bT.length;bV=0)}}},ID:function(bR,e){return bR.nodeType===1&&bR.getAttribute("id")===e},TAG:function(bR,e){return(e==="*"&&bR.nodeType===1)||!!bR.nodeName&&bR.nodeName.toLowerCase()===e},CLASS:function(bR,e){return(" "+(bR.className||bR.getAttribute("class"))+" ").indexOf(e)>-1},ATTR:function(bV,bT){var bS=bT[1],e=by.attr?by.attr(bV,bS):bE.attrHandle[bS]?bE.attrHandle[bS](bV):bV[bS]!=null?bV[bS]:bV.getAttribute(bS),bW=e+"",bU=bT[2],bR=bT[4];return e==null?bU==="!=":!bU&&by.attr?e!=null:bU==="="?bW===bR:bU==="*="?bW.indexOf(bR)>=0:bU==="~="?(" "+bW+" ").indexOf(bR)>=0:!bR?bW&&e!==false:bU==="!="?bW!==bR:bU==="^="?bW.indexOf(bR)===0:bU==="$="?bW.substr(bW.length-bR.length)===bR:bU==="|="?bW===bR||bW.substr(0,bR.length+1)===bR+"-":false},POS:function(bU,bR,bS,bV){var e=bR[2],bT=bE.setFilters[e];if(bT){return bT(bU,bS,bR,bV)}}}};var bD=bE.match.POS,bx=function(bR,e){return"\\"+(e-0+1)};for(var bz in bE.match){bE.match[bz]=new RegExp(bE.match[bz].source+(/(?![^\[]*\])(?![^\(]*\))/.source));bE.leftMatch[bz]=new RegExp(/(^(?:.|\r|\n)*?)/.source+bE.match[bz].source.replace(/\\(\d+)/g,bx))}var bF=function(bR,e){bR=Array.prototype.slice.call(bR,0);if(e){e.push.apply(e,bR);return e}return bR};try{Array.prototype.slice.call(av.documentElement.childNodes,0)[0].nodeType}catch(bP){bF=function(bU,bT){var bS=0,bR=bT||[];if(bL.call(bU)==="[object Array]"){Array.prototype.push.apply(bR,bU)}else{if(typeof bU.length==="number"){for(var e=bU.length;bS";e.insertBefore(bR,e.firstChild);if(av.getElementById(bS)){bE.find.ID=function(bU,bV,bW){if(typeof bV.getElementById!=="undefined"&&!bW){var bT=bV.getElementById(bU[1]);return bT?bT.id===bU[1]||typeof bT.getAttributeNode!=="undefined"&&bT.getAttributeNode("id").nodeValue===bU[1]?[bT]:L:[]}};bE.filter.ID=function(bV,bT){var bU=typeof bV.getAttributeNode!=="undefined"&&bV.getAttributeNode("id");return bV.nodeType===1&&bU&&bU.nodeValue===bT}}e.removeChild(bR);e=bR=null})();(function(){var e=av.createElement("div");e.appendChild(av.createComment(""));if(e.getElementsByTagName("*").length>0){bE.find.TAG=function(bR,bV){var bU=bV.getElementsByTagName(bR[1]);if(bR[1]==="*"){var bT=[];for(var bS=0;bU[bS];bS++){if(bU[bS].nodeType===1){bT.push(bU[bS])}}bU=bT}return bU}}e.innerHTML="";if(e.firstChild&&typeof e.firstChild.getAttribute!=="undefined"&&e.firstChild.getAttribute("href")!=="#"){bE.attrHandle.href=function(bR){return bR.getAttribute("href",2)}}e=null})();if(av.querySelectorAll){(function(){var e=by,bT=av.createElement("div"),bS="__sizzle__";bT.innerHTML="

";if(bT.querySelectorAll&&bT.querySelectorAll(".TEST").length===0){return}by=function(b4,bV,bZ,b3){bV=bV||av;if(!b3&&!by.isXML(bV)){var b2=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b4);if(b2&&(bV.nodeType===1||bV.nodeType===9)){if(b2[1]){return bF(bV.getElementsByTagName(b4),bZ)}else{if(b2[2]&&bE.find.CLASS&&bV.getElementsByClassName){return bF(bV.getElementsByClassName(b2[2]),bZ)}}}if(bV.nodeType===9){if(b4==="body"&&bV.body){return bF([bV.body],bZ)}else{if(b2&&b2[3]){var bY=bV.getElementById(b2[3]);if(bY&&bY.parentNode){if(bY.id===b2[3]){return bF([bY],bZ)}}else{return bF([],bZ)}}}try{return bF(bV.querySelectorAll(b4),bZ)}catch(b0){}}else{if(bV.nodeType===1&&bV.nodeName.toLowerCase()!=="object"){var bW=bV,bX=bV.getAttribute("id"),bU=bX||bS,b6=bV.parentNode,b5=/^\s*[+~]/.test(b4);if(!bX){bV.setAttribute("id",bU)}else{bU=bU.replace(/'/g,"\\$&")}if(b5&&b6){bV=bV.parentNode}try{if(!b5||b6){return bF(bV.querySelectorAll("[id='"+bU+"'] "+b4),bZ)}}catch(b1){}finally{if(!bX){bW.removeAttribute("id")}}}}}return e(b4,bV,bZ,b3)};for(var bR in e){by[bR]=e[bR]}bT=null})()}(function(){var e=av.documentElement,bS=e.matchesSelector||e.mozMatchesSelector||e.webkitMatchesSelector||e.msMatchesSelector;if(bS){var bU=!bS.call(av.createElement("div"),"div"),bR=false;try{bS.call(av.documentElement,"[test!='']:sizzle")}catch(bT){bR=true}by.matchesSelector=function(bW,bY){bY=bY.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!by.isXML(bW)){try{if(bR||!bE.match.PSEUDO.test(bY)&&!/!=/.test(bY)){var bV=bS.call(bW,bY);if(bV||!bU||bW.document&&bW.document.nodeType!==11){return bV}}}catch(bX){}}return by(bY,null,null,[bW]).length>0}}})();(function(){var e=av.createElement("div");e.innerHTML="
";if(!e.getElementsByClassName||e.getElementsByClassName("e").length===0){return}e.lastChild.className="e";if(e.getElementsByClassName("e").length===1){return}bE.order.splice(1,0,"CLASS");bE.find.CLASS=function(bR,bS,bT){if(typeof bS.getElementsByClassName!=="undefined"&&!bT){return bS.getElementsByClassName(bR[1])}};e=null})();function bv(bR,bW,bV,bZ,bX,bY){for(var bT=0,bS=bZ.length;bT0){bU=e;break}}}e=e[bR]}bZ[bT]=bU}}}if(av.documentElement.contains){by.contains=function(bR,e){return bR!==e&&(bR.contains?bR.contains(e):true)}}else{if(av.documentElement.compareDocumentPosition){by.contains=function(bR,e){return !!(bR.compareDocumentPosition(e)&16)}}else{by.contains=function(){return false}}}by.isXML=function(e){var bR=(e?e.ownerDocument||e:0).documentElement;return bR?bR.nodeName!=="HTML":false};var bM=function(bS,e,bW){var bV,bX=[],bU="",bY=e.nodeType?[e]:e;while((bV=bE.match.PSEUDO.exec(bS))){bU+=bV[0];bS=bS.replace(bE.match.PSEUDO,"")}bS=bE.relative[bS]?bS+"*":bS;for(var bT=0,bR=bY.length;bT0){for(bB=bA;bB=0:b.filter(e,this).length>0:this.filter(e).length>0)},closest:function(by,bx){var bv=[],bw,e,bz=this[0];if(b.isArray(by)){var bB=1;while(bz&&bz.ownerDocument&&bz!==bx){for(bw=0;bw-1:b.find.matchesSelector(bz,by)){bv.push(bz);break}else{bz=bz.parentNode;if(!bz||!bz.ownerDocument||bz===bx||bz.nodeType===11){break}}}}bv=bv.length>1?b.unique(bv):bv;return this.pushStack(bv,"closest",by)},index:function(e){if(!e){return(this[0]&&this[0].parentNode)?this.prevAll().length:-1}if(typeof e==="string"){return b.inArray(this[0],b(e))}return b.inArray(e.jquery?e[0]:e,this)},add:function(e,bv){var bx=typeof e==="string"?b(e,bv):b.makeArray(e&&e.nodeType?[e]:e),bw=b.merge(this.get(),bx);return this.pushStack(C(bx[0])||C(bw[0])?bw:b.unique(bw))},andSelf:function(){return this.add(this.prevObject)}});function C(e){return !e||!e.parentNode||e.parentNode.nodeType===11}b.each({parent:function(bv){var e=bv.parentNode;return e&&e.nodeType!==11?e:null},parents:function(e){return b.dir(e,"parentNode")},parentsUntil:function(bv,e,bw){return b.dir(bv,"parentNode",bw)},next:function(e){return b.nth(e,2,"nextSibling")},prev:function(e){return b.nth(e,2,"previousSibling")},nextAll:function(e){return b.dir(e,"nextSibling")},prevAll:function(e){return b.dir(e,"previousSibling")},nextUntil:function(bv,e,bw){return b.dir(bv,"nextSibling",bw)},prevUntil:function(bv,e,bw){return b.dir(bv,"previousSibling",bw)},siblings:function(e){return b.sibling(e.parentNode.firstChild,e)},children:function(e){return b.sibling(e.firstChild)},contents:function(e){return b.nodeName(e,"iframe")?e.contentDocument||e.contentWindow.document:b.makeArray(e.childNodes)}},function(e,bv){b.fn[e]=function(by,bw){var bx=b.map(this,bv,by);if(!ab.test(e)){bw=by}if(bw&&typeof bw==="string"){bx=b.filter(bw,bx)}bx=this.length>1&&!ay[e]?b.unique(bx):bx;if((this.length>1||a9.test(bw))&&aq.test(e)){bx=bx.reverse()}return this.pushStack(bx,e,P.call(arguments).join(","))}});b.extend({filter:function(bw,e,bv){if(bv){bw=":not("+bw+")"}return e.length===1?b.find.matchesSelector(e[0],bw)?[e[0]]:[]:b.find.matches(bw,e)},dir:function(bw,bv,by){var e=[],bx=bw[bv];while(bx&&bx.nodeType!==9&&(by===L||bx.nodeType!==1||!b(bx).is(by))){if(bx.nodeType===1){e.push(bx)}bx=bx[bv]}return e},nth:function(by,e,bw,bx){e=e||1;var bv=0;for(;by;by=by[bw]){if(by.nodeType===1&&++bv===e){break}}return by},sibling:function(bw,bv){var e=[];for(;bw;bw=bw.nextSibling){if(bw.nodeType===1&&bw!==bv){e.push(bw)}}return e}});function aG(bx,bw,e){bw=bw||0;if(b.isFunction(bw)){return b.grep(bx,function(bz,by){var bA=!!bw.call(bz,by,bz);return bA===e})}else{if(bw.nodeType){return b.grep(bx,function(bz,by){return(bz===bw)===e})}else{if(typeof bw==="string"){var bv=b.grep(bx,function(by){return by.nodeType===1});if(bp.test(bw)){return b.filter(bw,bv,!e)}else{bw=b.filter(bw,bv)}}}}return b.grep(bx,function(bz,by){return(b.inArray(bz,bw)>=0)===e})}function a(e){var bw=aR.split("|"),bv=e.createDocumentFragment();if(bv.createElement){while(bw.length){bv.createElement(bw.pop())}}return bv}var aR="abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",ag=/ jQuery\d+="(?:\d+|null)"/g,ar=/^\s+/,R=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,d=/<([\w:]+)/,w=/",""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]},ac=a(av);ax.optgroup=ax.option;ax.tbody=ax.tfoot=ax.colgroup=ax.caption=ax.thead;ax.th=ax.td;if(!b.support.htmlSerialize){ax._default=[1,"div
","
"]}b.fn.extend({text:function(e){if(b.isFunction(e)){return this.each(function(bw){var bv=b(this);bv.text(e.call(this,bw,bv.text()))})}if(typeof e!=="object"&&e!==L){return this.empty().append((this[0]&&this[0].ownerDocument||av).createTextNode(e))}return b.text(this)},wrapAll:function(e){if(b.isFunction(e)){return this.each(function(bw){b(this).wrapAll(e.call(this,bw))})}if(this[0]){var bv=b(e,this[0].ownerDocument).eq(0).clone(true);if(this[0].parentNode){bv.insertBefore(this[0])}bv.map(function(){var bw=this;while(bw.firstChild&&bw.firstChild.nodeType===1){bw=bw.firstChild}return bw}).append(this)}return this},wrapInner:function(e){if(b.isFunction(e)){return this.each(function(bv){b(this).wrapInner(e.call(this,bv))})}return this.each(function(){var bv=b(this),bw=bv.contents();if(bw.length){bw.wrapAll(e)}else{bv.append(e)}})},wrap:function(e){var bv=b.isFunction(e);return this.each(function(bw){b(this).wrapAll(bv?e.call(this,bw):e)})},unwrap:function(){return this.parent().each(function(){if(!b.nodeName(this,"body")){b(this).replaceWith(this.childNodes)}}).end()},append:function(){return this.domManip(arguments,true,function(e){if(this.nodeType===1){this.appendChild(e)}})},prepend:function(){return this.domManip(arguments,true,function(e){if(this.nodeType===1){this.insertBefore(e,this.firstChild)}})},before:function(){if(this[0]&&this[0].parentNode){return this.domManip(arguments,false,function(bv){this.parentNode.insertBefore(bv,this)})}else{if(arguments.length){var e=b.clean(arguments);e.push.apply(e,this.toArray());return this.pushStack(e,"before",arguments)}}},after:function(){if(this[0]&&this[0].parentNode){return this.domManip(arguments,false,function(bv){this.parentNode.insertBefore(bv,this.nextSibling)})}else{if(arguments.length){var e=this.pushStack(this,"after",arguments);e.push.apply(e,b.clean(arguments));return e}}},remove:function(e,bx){for(var bv=0,bw;(bw=this[bv])!=null;bv++){if(!e||b.filter(e,[bw]).length){if(!bx&&bw.nodeType===1){b.cleanData(bw.getElementsByTagName("*"));b.cleanData([bw])}if(bw.parentNode){bw.parentNode.removeChild(bw)}}}return this},empty:function(){for(var e=0,bv;(bv=this[e])!=null;e++){if(bv.nodeType===1){b.cleanData(bv.getElementsByTagName("*"))}while(bv.firstChild){bv.removeChild(bv.firstChild)}}return this},clone:function(bv,e){bv=bv==null?false:bv;e=e==null?bv:e;return this.map(function(){return b.clone(this,bv,e)})},html:function(bx){if(bx===L){return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(ag,""):null}else{if(typeof bx==="string"&&!ae.test(bx)&&(b.support.leadingWhitespace||!ar.test(bx))&&!ax[(d.exec(bx)||["",""])[1].toLowerCase()]){bx=bx.replace(R,"<$1>");try{for(var bw=0,bv=this.length;bw1&&bw0?this.clone(true):this).get();b(bC[bA])[bv](by);bz=bz.concat(by)}return this.pushStack(bz,e,bC.selector)}}});function bg(e){if(typeof e.getElementsByTagName!=="undefined"){return e.getElementsByTagName("*")}else{if(typeof e.querySelectorAll!=="undefined"){return e.querySelectorAll("*")}else{return[]}}}function az(e){if(e.type==="checkbox"||e.type==="radio"){e.defaultChecked=e.checked}}function E(e){var bv=(e.nodeName||"").toLowerCase();if(bv==="input"){az(e)}else{if(bv!=="script"&&typeof e.getElementsByTagName!=="undefined"){b.grep(e.getElementsByTagName("input"),az)}}}function al(e){var bv=av.createElement("div");ac.appendChild(bv);bv.innerHTML=e.outerHTML;return bv.firstChild}b.extend({clone:function(by,bA,bw){var e,bv,bx,bz=b.support.html5Clone||!ah.test("<"+by.nodeName)?by.cloneNode(true):al(by);if((!b.support.noCloneEvent||!b.support.noCloneChecked)&&(by.nodeType===1||by.nodeType===11)&&!b.isXMLDoc(by)){ai(by,bz);e=bg(by);bv=bg(bz);for(bx=0;e[bx];++bx){if(bv[bx]){ai(e[bx],bv[bx])}}}if(bA){t(by,bz);if(bw){e=bg(by);bv=bg(bz);for(bx=0;e[bx];++bx){t(e[bx],bv[bx])}}}e=bv=null;return bz},clean:function(bw,by,bH,bA){var bF;by=by||av;if(typeof by.createElement==="undefined"){by=by.ownerDocument||by[0]&&by[0].ownerDocument||av}var bI=[],bB;for(var bE=0,bz;(bz=bw[bE])!=null;bE++){if(typeof bz==="number"){bz+=""}if(!bz){continue}if(typeof bz==="string"){if(!W.test(bz)){bz=by.createTextNode(bz)}else{bz=bz.replace(R,"<$1>");var bK=(d.exec(bz)||["",""])[1].toLowerCase(),bx=ax[bK]||ax._default,bD=bx[0],bv=by.createElement("div");if(by===av){ac.appendChild(bv)}else{a(by).appendChild(bv)}bv.innerHTML=bx[1]+bz+bx[2];while(bD--){bv=bv.lastChild}if(!b.support.tbody){var e=w.test(bz),bC=bK==="table"&&!e?bv.firstChild&&bv.firstChild.childNodes:bx[1]===""&&!e?bv.childNodes:[];for(bB=bC.length-1;bB>=0;--bB){if(b.nodeName(bC[bB],"tbody")&&!bC[bB].childNodes.length){bC[bB].parentNode.removeChild(bC[bB])}}}if(!b.support.leadingWhitespace&&ar.test(bz)){bv.insertBefore(by.createTextNode(ar.exec(bz)[0]),bv.firstChild)}bz=bv.childNodes}}var bG;if(!b.support.appendChecked){if(bz[0]&&typeof(bG=bz.length)==="number"){for(bB=0;bB=0){return bx+"px"}}else{return bx}}}});if(!b.support.opacity){b.cssHooks.opacity={get:function(bv,e){return au.test((e&&bv.currentStyle?bv.currentStyle.filter:bv.style.filter)||"")?(parseFloat(RegExp.$1)/100)+"":e?"1":""},set:function(by,bz){var bx=by.style,bv=by.currentStyle,e=b.isNumeric(bz)?"alpha(opacity="+bz*100+")":"",bw=bv&&bv.filter||bx.filter||"";bx.zoom=1;if(bz>=1&&b.trim(bw.replace(ak,""))===""){bx.removeAttribute("filter");if(bv&&!bv.filter){return}}bx.filter=ak.test(bw)?bw.replace(ak,e):bw+" "+e}}}b(function(){if(!b.support.reliableMarginRight){b.cssHooks.marginRight={get:function(bw,bv){var e;b.swap(bw,{display:"inline-block"},function(){if(bv){e=Z(bw,"margin-right","marginRight")}else{e=bw.style.marginRight}});return e}}}});if(av.defaultView&&av.defaultView.getComputedStyle){aI=function(by,bw){var bv,bx,e;bw=bw.replace(z,"-$1").toLowerCase();if((bx=by.ownerDocument.defaultView)&&(e=bx.getComputedStyle(by,null))){bv=e.getPropertyValue(bw);if(bv===""&&!b.contains(by.ownerDocument.documentElement,by)){bv=b.style(by,bw)}}return bv}}if(av.documentElement.currentStyle){aX=function(bz,bw){var bA,e,by,bv=bz.currentStyle&&bz.currentStyle[bw],bx=bz.style;if(bv===null&&bx&&(by=bx[bw])){bv=by}if(!bc.test(bv)&&bn.test(bv)){bA=bx.left;e=bz.runtimeStyle&&bz.runtimeStyle.left;if(e){bz.runtimeStyle.left=bz.currentStyle.left}bx.left=bw==="fontSize"?"1em":(bv||0);bv=bx.pixelLeft+"px";bx.left=bA;if(e){bz.runtimeStyle.left=e}}return bv===""?"auto":bv}}Z=aI||aX;function p(by,bw,bv){var bA=bw==="width"?by.offsetWidth:by.offsetHeight,bz=bw==="width"?an:a1,bx=0,e=bz.length;if(bA>0){if(bv!=="border"){for(;bx)<[^<]*)*<\/script>/gi,q=/^(?:select|textarea)/i,h=/\s+/,br=/([?&])_=[^&]*/,K=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,A=b.fn.load,aa={},r={},aE,s,aV=["*/"]+["*"];try{aE=bl.href}catch(aw){aE=av.createElement("a");aE.href="";aE=aE.href}s=K.exec(aE.toLowerCase())||[];function f(e){return function(by,bA){if(typeof by!=="string"){bA=by;by="*"}if(b.isFunction(bA)){var bx=by.toLowerCase().split(h),bw=0,bz=bx.length,bv,bB,bC;for(;bw=0){var e=bw.slice(by,bw.length);bw=bw.slice(0,by)}var bx="GET";if(bz){if(b.isFunction(bz)){bA=bz;bz=L}else{if(typeof bz==="object"){bz=b.param(bz,b.ajaxSettings.traditional);bx="POST"}}}var bv=this;b.ajax({url:bw,type:bx,dataType:"html",data:bz,complete:function(bC,bB,bD){bD=bC.responseText;if(bC.isResolved()){bC.done(function(bE){bD=bE});bv.html(e?b("
").append(bD.replace(a6,"")).find(e):bD)}if(bA){bv.each(bA,[bD,bB,bC])}}});return this},serialize:function(){return b.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?b.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||q.test(this.nodeName)||aZ.test(this.type))}).map(function(e,bv){var bw=b(this).val();return bw==null?null:b.isArray(bw)?b.map(bw,function(by,bx){return{name:bv.name,value:by.replace(bs,"\r\n")}}):{name:bv.name,value:bw.replace(bs,"\r\n")}}).get()}});b.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(e,bv){b.fn[bv]=function(bw){return this.on(bv,bw)}});b.each(["get","post"],function(e,bv){b[bv]=function(bw,by,bz,bx){if(b.isFunction(by)){bx=bx||bz;bz=by;by=L}return b.ajax({type:bv,url:bw,data:by,success:bz,dataType:bx})}});b.extend({getScript:function(e,bv){return b.get(e,L,bv,"script")},getJSON:function(e,bv,bw){return b.get(e,bv,bw,"json")},ajaxSetup:function(bv,e){if(e){am(bv,b.ajaxSettings)}else{e=bv;bv=b.ajaxSettings}am(bv,e);return bv},ajaxSettings:{url:aE,isLocal:aM.test(s[1]),global:true,type:"GET",contentType:"application/x-www-form-urlencoded",processData:true,async:true,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":aV},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":bb.String,"text html":true,"text json":b.parseJSON,"text xml":b.parseXML},flatOptions:{context:true,url:true}},ajaxPrefilter:f(aa),ajaxTransport:f(r),ajax:function(bz,bx){if(typeof bz==="object"){bx=bz;bz=L}bx=bx||{};var bD=b.ajaxSetup({},bx),bS=bD.context||bD,bG=bS!==bD&&(bS.nodeType||bS instanceof b)?b(bS):b.event,bR=b.Deferred(),bN=b.Callbacks("once memory"),bB=bD.statusCode||{},bC,bH={},bO={},bQ,by,bL,bE,bI,bA=0,bw,bK,bJ={readyState:0,setRequestHeader:function(bT,bU){if(!bA){var e=bT.toLowerCase();bT=bO[e]=bO[e]||bT;bH[bT]=bU}return this},getAllResponseHeaders:function(){return bA===2?bQ:null},getResponseHeader:function(bT){var e;if(bA===2){if(!by){by={};while((e=aD.exec(bQ))){by[e[1].toLowerCase()]=e[2]}}e=by[bT.toLowerCase()]}return e===L?null:e},overrideMimeType:function(e){if(!bA){bD.mimeType=e}return this},abort:function(e){e=e||"abort";if(bL){bL.abort(e)}bF(0,e);return this}};function bF(bZ,bU,b0,bW){if(bA===2){return}bA=2;if(bE){clearTimeout(bE)}bL=L;bQ=bW||"";bJ.readyState=bZ>0?4:0;var bT,b4,b3,bX=bU,bY=b0?bj(bD,bJ,b0):L,bV,b2;if(bZ>=200&&bZ<300||bZ===304){if(bD.ifModified){if((bV=bJ.getResponseHeader("Last-Modified"))){b.lastModified[bC]=bV}if((b2=bJ.getResponseHeader("Etag"))){b.etag[bC]=b2}}if(bZ===304){bX="notmodified";bT=true}else{try{b4=G(bD,bY);bX="success";bT=true}catch(b1){bX="parsererror";b3=b1}}}else{b3=bX;if(!bX||bZ){bX="error";if(bZ<0){bZ=0}}}bJ.status=bZ;bJ.statusText=""+(bU||bX);if(bT){bR.resolveWith(bS,[b4,bX,bJ])}else{bR.rejectWith(bS,[bJ,bX,b3])}bJ.statusCode(bB);bB=L;if(bw){bG.trigger("ajax"+(bT?"Success":"Error"),[bJ,bD,bT?b4:b3])}bN.fireWith(bS,[bJ,bX]);if(bw){bG.trigger("ajaxComplete",[bJ,bD]);if(!(--b.active)){b.event.trigger("ajaxStop")}}}bR.promise(bJ);bJ.success=bJ.done;bJ.error=bJ.fail;bJ.complete=bN.add;bJ.statusCode=function(bT){if(bT){var e;if(bA<2){for(e in bT){bB[e]=[bB[e],bT[e]]}}else{e=bT[bJ.status];bJ.then(e,e)}}return this};bD.url=((bz||bD.url)+"").replace(bq,"").replace(c,s[1]+"//");bD.dataTypes=b.trim(bD.dataType||"*").toLowerCase().split(h);if(bD.crossDomain==null){bI=K.exec(bD.url.toLowerCase());bD.crossDomain=!!(bI&&(bI[1]!=s[1]||bI[2]!=s[2]||(bI[3]||(bI[1]==="http:"?80:443))!=(s[3]||(s[1]==="http:"?80:443))))}if(bD.data&&bD.processData&&typeof bD.data!=="string"){bD.data=b.param(bD.data,bD.traditional)}aW(aa,bD,bx,bJ);if(bA===2){return false}bw=bD.global;bD.type=bD.type.toUpperCase();bD.hasContent=!aQ.test(bD.type);if(bw&&b.active++===0){b.event.trigger("ajaxStart")}if(!bD.hasContent){if(bD.data){bD.url+=(M.test(bD.url)?"&":"?")+bD.data;delete bD.data}bC=bD.url;if(bD.cache===false){var bv=b.now(),bP=bD.url.replace(br,"$1_="+bv);bD.url=bP+((bP===bD.url)?(M.test(bD.url)?"&":"?")+"_="+bv:"")}}if(bD.data&&bD.hasContent&&bD.contentType!==false||bx.contentType){bJ.setRequestHeader("Content-Type",bD.contentType)}if(bD.ifModified){bC=bC||bD.url;if(b.lastModified[bC]){bJ.setRequestHeader("If-Modified-Since",b.lastModified[bC])}if(b.etag[bC]){bJ.setRequestHeader("If-None-Match",b.etag[bC])}}bJ.setRequestHeader("Accept",bD.dataTypes[0]&&bD.accepts[bD.dataTypes[0]]?bD.accepts[bD.dataTypes[0]]+(bD.dataTypes[0]!=="*"?", "+aV+"; q=0.01":""):bD.accepts["*"]);for(bK in bD.headers){bJ.setRequestHeader(bK,bD.headers[bK])}if(bD.beforeSend&&(bD.beforeSend.call(bS,bJ,bD)===false||bA===2)){bJ.abort();return false}for(bK in {success:1,error:1,complete:1}){bJ[bK](bD[bK])}bL=aW(r,bD,bx,bJ);if(!bL){bF(-1,"No Transport")}else{bJ.readyState=1;if(bw){bG.trigger("ajaxSend",[bJ,bD])}if(bD.async&&bD.timeout>0){bE=setTimeout(function(){bJ.abort("timeout")},bD.timeout)}try{bA=1;bL.send(bH,bF)}catch(bM){if(bA<2){bF(-1,bM)}else{throw bM}}}return bJ},param:function(e,bw){var bv=[],by=function(bz,bA){bA=b.isFunction(bA)?bA():bA;bv[bv.length]=encodeURIComponent(bz)+"="+encodeURIComponent(bA)};if(bw===L){bw=b.ajaxSettings.traditional}if(b.isArray(e)||(e.jquery&&!b.isPlainObject(e))){b.each(e,function(){by(this.name,this.value)})}else{for(var bx in e){v(bx,e[bx],bw,by)}}return bv.join("&").replace(k,"+")}});function v(bw,by,bv,bx){if(b.isArray(by)){b.each(by,function(bA,bz){if(bv||ap.test(bw)){bx(bw,bz)}else{v(bw+"["+(typeof bz==="object"||b.isArray(bz)?bA:"")+"]",bz,bv,bx)}})}else{if(!bv&&by!=null&&typeof by==="object"){for(var e in by){v(bw+"["+e+"]",by[e],bv,bx)}}else{bx(bw,by)}}}b.extend({active:0,lastModified:{},etag:{}});function bj(bD,bC,bz){var bv=bD.contents,bB=bD.dataTypes,bw=bD.responseFields,by,bA,bx,e;for(bA in bw){if(bA in bz){bC[bw[bA]]=bz[bA]}}while(bB[0]==="*"){bB.shift();if(by===L){by=bD.mimeType||bC.getResponseHeader("content-type")}}if(by){for(bA in bv){if(bv[bA]&&bv[bA].test(by)){bB.unshift(bA);break}}}if(bB[0] in bz){bx=bB[0]}else{for(bA in bz){if(!bB[0]||bD.converters[bA+" "+bB[0]]){bx=bA;break}if(!e){e=bA}}bx=bx||e}if(bx){if(bx!==bB[0]){bB.unshift(bx)}return bz[bx]}}function G(bH,bz){if(bH.dataFilter){bz=bH.dataFilter(bz,bH.dataType)}var bD=bH.dataTypes,bG={},bA,bE,bw=bD.length,bB,bC=bD[0],bx,by,bF,bv,e;for(bA=1;bA=bw.duration+this.startTime){this.now=this.end;this.pos=this.state=1;this.update();bw.animatedProperties[this.prop]=true;for(bA in bw.animatedProperties){if(bw.animatedProperties[bA]!==true){e=false}}if(e){if(bw.overflow!=null&&!b.support.shrinkWrapBlocks){b.each(["","X","Y"],function(bC,bD){bz.style["overflow"+bD]=bw.overflow[bC]})}if(bw.hide){b(bz).hide()}if(bw.hide||bw.show){for(bA in bw.animatedProperties){b.style(bz,bA,bw.orig[bA]);b.removeData(bz,"fxshow"+bA,true);b.removeData(bz,"toggle"+bA,true)}}bv=bw.complete;if(bv){bw.complete=false;bv.call(bz)}}return false}else{if(bw.duration==Infinity){this.now=bx}else{bB=bx-this.startTime;this.state=bB/bw.duration;this.pos=b.easing[bw.animatedProperties[this.prop]](this.state,bB,0,1,bw.duration);this.now=this.start+((this.end-this.start)*this.pos)}this.update()}return true}};b.extend(b.fx,{tick:function(){var bw,bv=b.timers,e=0;for(;e").appendTo(e),bw=bv.css("display");bv.remove();if(bw==="none"||bw===""){if(!a8){a8=av.createElement("iframe");a8.frameBorder=a8.width=a8.height=0}e.appendChild(a8);if(!m||!a8.createElement){m=(a8.contentWindow||a8.contentDocument).document;m.write((av.compatMode==="CSS1Compat"?"":"")+"");m.close()}bv=m.createElement(bx);m.body.appendChild(bv);bw=b.css(bv,"display");e.removeChild(a8)}Q[bx]=bw}return Q[bx]}var V=/^t(?:able|d|h)$/i,ad=/^(?:body|html)$/i;if("getBoundingClientRect" in av.documentElement){b.fn.offset=function(bI){var by=this[0],bB;if(bI){return this.each(function(e){b.offset.setOffset(this,bI,e)})}if(!by||!by.ownerDocument){return null}if(by===by.ownerDocument.body){return b.offset.bodyOffset(by)}try{bB=by.getBoundingClientRect()}catch(bF){}var bH=by.ownerDocument,bw=bH.documentElement;if(!bB||!b.contains(bw,by)){return bB?{top:bB.top,left:bB.left}:{top:0,left:0}}var bC=bH.body,bD=aK(bH),bA=bw.clientTop||bC.clientTop||0,bE=bw.clientLeft||bC.clientLeft||0,bv=bD.pageYOffset||b.support.boxModel&&bw.scrollTop||bC.scrollTop,bz=bD.pageXOffset||b.support.boxModel&&bw.scrollLeft||bC.scrollLeft,bG=bB.top+bv-bA,bx=bB.left+bz-bE;return{top:bG,left:bx}}}else{b.fn.offset=function(bF){var bz=this[0];if(bF){return this.each(function(bG){b.offset.setOffset(this,bF,bG)})}if(!bz||!bz.ownerDocument){return null}if(bz===bz.ownerDocument.body){return b.offset.bodyOffset(bz)}var bC,bw=bz.offsetParent,bv=bz,bE=bz.ownerDocument,bx=bE.documentElement,bA=bE.body,bB=bE.defaultView,e=bB?bB.getComputedStyle(bz,null):bz.currentStyle,bD=bz.offsetTop,by=bz.offsetLeft;while((bz=bz.parentNode)&&bz!==bA&&bz!==bx){if(b.support.fixedPosition&&e.position==="fixed"){break}bC=bB?bB.getComputedStyle(bz,null):bz.currentStyle;bD-=bz.scrollTop;by-=bz.scrollLeft;if(bz===bw){bD+=bz.offsetTop;by+=bz.offsetLeft;if(b.support.doesNotAddBorder&&!(b.support.doesAddBorderForTableAndCells&&V.test(bz.nodeName))){bD+=parseFloat(bC.borderTopWidth)||0;by+=parseFloat(bC.borderLeftWidth)||0}bv=bw;bw=bz.offsetParent}if(b.support.subtractsBorderForOverflowNotVisible&&bC.overflow!=="visible"){bD+=parseFloat(bC.borderTopWidth)||0;by+=parseFloat(bC.borderLeftWidth)||0}e=bC}if(e.position==="relative"||e.position==="static"){bD+=bA.offsetTop;by+=bA.offsetLeft}if(b.support.fixedPosition&&e.position==="fixed"){bD+=Math.max(bx.scrollTop,bA.scrollTop);by+=Math.max(bx.scrollLeft,bA.scrollLeft)}return{top:bD,left:by}}}b.offset={bodyOffset:function(e){var bw=e.offsetTop,bv=e.offsetLeft;if(b.support.doesNotIncludeMarginInBodyOffset){bw+=parseFloat(b.css(e,"marginTop"))||0;bv+=parseFloat(b.css(e,"marginLeft"))||0}return{top:bw,left:bv}},setOffset:function(bx,bG,bA){var bB=b.css(bx,"position");if(bB==="static"){bx.style.position="relative"}var bz=b(bx),bv=bz.offset(),e=b.css(bx,"top"),bE=b.css(bx,"left"),bF=(bB==="absolute"||bB==="fixed")&&b.inArray("auto",[e,bE])>-1,bD={},bC={},bw,by;if(bF){bC=bz.position();bw=bC.top;by=bC.left}else{bw=parseFloat(e)||0;by=parseFloat(bE)||0}if(b.isFunction(bG)){bG=bG.call(bx,bA,bv)}if(bG.top!=null){bD.top=(bG.top-bv.top)+bw}if(bG.left!=null){bD.left=(bG.left-bv.left)+by}if("using" in bG){bG.using.call(bx,bD)}else{bz.css(bD)}}};b.fn.extend({position:function(){if(!this[0]){return null}var bw=this[0],bv=this.offsetParent(),bx=this.offset(),e=ad.test(bv[0].nodeName)?{top:0,left:0}:bv.offset();bx.top-=parseFloat(b.css(bw,"marginTop"))||0;bx.left-=parseFloat(b.css(bw,"marginLeft"))||0;e.top+=parseFloat(b.css(bv[0],"borderTopWidth"))||0;e.left+=parseFloat(b.css(bv[0],"borderLeftWidth"))||0;return{top:bx.top-e.top,left:bx.left-e.left}},offsetParent:function(){return this.map(function(){var e=this.offsetParent||av.body;while(e&&(!ad.test(e.nodeName)&&b.css(e,"position")==="static")){e=e.offsetParent}return e})}});b.each(["Left","Top"],function(bv,e){var bw="scroll"+e;b.fn[bw]=function(bz){var bx,by;if(bz===L){bx=this[0];if(!bx){return null}by=aK(bx);return by?("pageXOffset" in by)?by[bv?"pageYOffset":"pageXOffset"]:b.support.boxModel&&by.document.documentElement[bw]||by.document.body[bw]:bx[bw]}return this.each(function(){by=aK(this);if(by){by.scrollTo(!bv?bz:b(by).scrollLeft(),bv?bz:b(by).scrollTop())}else{this[bw]=bz}})}});function aK(e){return b.isWindow(e)?e:e.nodeType===9?e.defaultView||e.parentWindow:false}b.each(["Height","Width"],function(bv,e){var bw=e.toLowerCase();b.fn["inner"+e]=function(){var bx=this[0];return bx?bx.style?parseFloat(b.css(bx,bw,"padding")):this[bw]():null};b.fn["outer"+e]=function(by){var bx=this[0];return bx?bx.style?parseFloat(b.css(bx,bw,by?"margin":"border")):this[bw]():null};b.fn[bw]=function(bz){var bA=this[0];if(!bA){return bz==null?null:this}if(b.isFunction(bz)){return this.each(function(bE){var bD=b(this);bD[bw](bz.call(this,bE,bD[bw]()))})}if(b.isWindow(bA)){var bB=bA.document.documentElement["client"+e],bx=bA.document.body;return bA.document.compatMode==="CSS1Compat"&&bB||bx&&bx["client"+e]||bB}else{if(bA.nodeType===9){return Math.max(bA.documentElement["client"+e],bA.body["scroll"+e],bA.documentElement["scroll"+e],bA.body["offset"+e],bA.documentElement["offset"+e])}else{if(bz===L){var bC=b.css(bA,bw),by=parseFloat(bC);return b.isNumeric(by)?by:bC}else{return this.css(bw,typeof bz==="string"?bz:bz+"px")}}}}});bb.jQuery=bb.$=b;if(typeof define==="function"&&define.amd&&define.amd.jQuery){define("jquery",[],function(){return b})}})(window);/*! + * jQuery UI 1.8.18 + * + * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about) + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * http://docs.jquery.com/UI + */ +(function(a,d){a.ui=a.ui||{};if(a.ui.version){return}a.extend(a.ui,{version:"1.8.18",keyCode:{ALT:18,BACKSPACE:8,CAPS_LOCK:20,COMMA:188,COMMAND:91,COMMAND_LEFT:91,COMMAND_RIGHT:93,CONTROL:17,DELETE:46,DOWN:40,END:35,ENTER:13,ESCAPE:27,HOME:36,INSERT:45,LEFT:37,MENU:93,NUMPAD_ADD:107,NUMPAD_DECIMAL:110,NUMPAD_DIVIDE:111,NUMPAD_ENTER:108,NUMPAD_MULTIPLY:106,NUMPAD_SUBTRACT:109,PAGE_DOWN:34,PAGE_UP:33,PERIOD:190,RIGHT:39,SHIFT:16,SPACE:32,TAB:9,UP:38,WINDOWS:91}});a.fn.extend({propAttr:a.fn.prop||a.fn.attr,_focus:a.fn.focus,focus:function(e,f){return typeof e==="number"?this.each(function(){var g=this;setTimeout(function(){a(g).focus();if(f){f.call(g)}},e)}):this._focus.apply(this,arguments)},scrollParent:function(){var e;if((a.browser.msie&&(/(static|relative)/).test(this.css("position")))||(/absolute/).test(this.css("position"))){e=this.parents().filter(function(){return(/(relative|absolute|fixed)/).test(a.curCSS(this,"position",1))&&(/(auto|scroll)/).test(a.curCSS(this,"overflow",1)+a.curCSS(this,"overflow-y",1)+a.curCSS(this,"overflow-x",1))}).eq(0)}else{e=this.parents().filter(function(){return(/(auto|scroll)/).test(a.curCSS(this,"overflow",1)+a.curCSS(this,"overflow-y",1)+a.curCSS(this,"overflow-x",1))}).eq(0)}return(/fixed/).test(this.css("position"))||!e.length?a(document):e},zIndex:function(h){if(h!==d){return this.css("zIndex",h)}if(this.length){var f=a(this[0]),e,g;while(f.length&&f[0]!==document){e=f.css("position");if(e==="absolute"||e==="relative"||e==="fixed"){g=parseInt(f.css("zIndex"),10);if(!isNaN(g)&&g!==0){return g}}f=f.parent()}}return 0},disableSelection:function(){return this.bind((a.support.selectstart?"selectstart":"mousedown")+".ui-disableSelection",function(e){e.preventDefault()})},enableSelection:function(){return this.unbind(".ui-disableSelection")}});a.each(["Width","Height"],function(g,e){var f=e==="Width"?["Left","Right"]:["Top","Bottom"],h=e.toLowerCase(),k={innerWidth:a.fn.innerWidth,innerHeight:a.fn.innerHeight,outerWidth:a.fn.outerWidth,outerHeight:a.fn.outerHeight};function j(m,l,i,n){a.each(f,function(){l-=parseFloat(a.curCSS(m,"padding"+this,true))||0;if(i){l-=parseFloat(a.curCSS(m,"border"+this+"Width",true))||0}if(n){l-=parseFloat(a.curCSS(m,"margin"+this,true))||0}});return l}a.fn["inner"+e]=function(i){if(i===d){return k["inner"+e].call(this)}return this.each(function(){a(this).css(h,j(this,i)+"px")})};a.fn["outer"+e]=function(i,l){if(typeof i!=="number"){return k["outer"+e].call(this,i)}return this.each(function(){a(this).css(h,j(this,i,true,l)+"px")})}});function c(g,e){var j=g.nodeName.toLowerCase();if("area"===j){var i=g.parentNode,h=i.name,f;if(!g.href||!h||i.nodeName.toLowerCase()!=="map"){return false}f=a("img[usemap=#"+h+"]")[0];return !!f&&b(f)}return(/input|select|textarea|button|object/.test(j)?!g.disabled:"a"==j?g.href||e:e)&&b(g)}function b(e){return !a(e).parents().andSelf().filter(function(){return a.curCSS(this,"visibility")==="hidden"||a.expr.filters.hidden(this)}).length}a.extend(a.expr[":"],{data:function(g,f,e){return !!a.data(g,e[3])},focusable:function(e){return c(e,!isNaN(a.attr(e,"tabindex")))},tabbable:function(g){var e=a.attr(g,"tabindex"),f=isNaN(e);return(f||e>=0)&&c(g,!f)}});a(function(){var e=document.body,f=e.appendChild(f=document.createElement("div"));f.offsetHeight;a.extend(f.style,{minHeight:"100px",height:"auto",padding:0,borderWidth:0});a.support.minHeight=f.offsetHeight===100;a.support.selectstart="onselectstart" in f;e.removeChild(f).style.display="none"});a.extend(a.ui,{plugin:{add:function(f,g,j){var h=a.ui[f].prototype;for(var e in j){h.plugins[e]=h.plugins[e]||[];h.plugins[e].push([g,j[e]])}},call:function(e,g,f){var j=e.plugins[g];if(!j||!e.element[0].parentNode){return}for(var h=0;h0){return true}h[e]=1;g=(h[e]>0);h[e]=0;return g},isOverAxis:function(f,e,g){return(f>e)&&(f<(e+g))},isOver:function(j,f,i,h,e,g){return a.ui.isOverAxis(j,i,e)&&a.ui.isOverAxis(f,h,g)}})})(jQuery);/*! + * jQuery UI Widget 1.8.18 + * + * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about) + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * http://docs.jquery.com/UI/Widget + */ +(function(b,d){if(b.cleanData){var c=b.cleanData;b.cleanData=function(f){for(var g=0,h;(h=f[g])!=null;g++){try{b(h).triggerHandler("remove")}catch(j){}}c(f)}}else{var a=b.fn.remove;b.fn.remove=function(e,f){return this.each(function(){if(!f){if(!e||b.filter(e,[this]).length){b("*",this).add([this]).each(function(){try{b(this).triggerHandler("remove")}catch(g){}})}}return a.call(b(this),e,f)})}}b.widget=function(f,h,e){var g=f.split(".")[0],j;f=f.split(".")[1];j=g+"-"+f;if(!e){e=h;h=b.Widget}b.expr[":"][j]=function(k){return !!b.data(k,f)};b[g]=b[g]||{};b[g][f]=function(k,l){if(arguments.length){this._createWidget(k,l)}};var i=new h();i.options=b.extend(true,{},i.options);b[g][f].prototype=b.extend(true,i,{namespace:g,widgetName:f,widgetEventPrefix:b[g][f].prototype.widgetEventPrefix||f,widgetBaseClass:j},e);b.widget.bridge(f,b[g][f])};b.widget.bridge=function(f,e){b.fn[f]=function(i){var g=typeof i==="string",h=Array.prototype.slice.call(arguments,1),j=this;i=!g&&h.length?b.extend.apply(null,[true,i].concat(h)):i;if(g&&i.charAt(0)==="_"){return j}if(g){this.each(function(){var k=b.data(this,f),l=k&&b.isFunction(k[i])?k[i].apply(k,h):k;if(l!==k&&l!==d){j=l;return false}})}else{this.each(function(){var k=b.data(this,f);if(k){k.option(i||{})._init()}else{b.data(this,f,new e(i,this))}})}return j}};b.Widget=function(e,f){if(arguments.length){this._createWidget(e,f)}};b.Widget.prototype={widgetName:"widget",widgetEventPrefix:"",options:{disabled:false},_createWidget:function(f,g){b.data(g,this.widgetName,this);this.element=b(g);this.options=b.extend(true,{},this.options,this._getCreateOptions(),f);var e=this;this.element.bind("remove."+this.widgetName,function(){e.destroy()});this._create();this._trigger("create");this._init()},_getCreateOptions:function(){return b.metadata&&b.metadata.get(this.element[0])[this.widgetName]},_create:function(){},_init:function(){},destroy:function(){this.element.unbind("."+this.widgetName).removeData(this.widgetName);this.widget().unbind("."+this.widgetName).removeAttr("aria-disabled").removeClass(this.widgetBaseClass+"-disabled ui-state-disabled")},widget:function(){return this.element},option:function(f,g){var e=f;if(arguments.length===0){return b.extend({},this.options)}if(typeof f==="string"){if(g===d){return this.options[f]}e={};e[f]=g}this._setOptions(e);return this},_setOptions:function(f){var e=this;b.each(f,function(g,h){e._setOption(g,h)});return this},_setOption:function(e,f){this.options[e]=f;if(e==="disabled"){this.widget()[f?"addClass":"removeClass"](this.widgetBaseClass+"-disabled ui-state-disabled").attr("aria-disabled",f)}return this},enable:function(){return this._setOption("disabled",false)},disable:function(){return this._setOption("disabled",true)},_trigger:function(e,f,g){var j,i,h=this.options[e];g=g||{};f=b.Event(f);f.type=(e===this.widgetEventPrefix?e:this.widgetEventPrefix+e).toLowerCase();f.target=this.element[0];i=f.originalEvent;if(i){for(j in i){if(!(j in f)){f[j]=i[j]}}}this.element.trigger(f,g);return !(b.isFunction(h)&&h.call(this.element[0],f,g)===false||f.isDefaultPrevented())}}})(jQuery);/*! + * jQuery UI Mouse 1.8.18 + * + * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about) + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * http://docs.jquery.com/UI/Mouse + * + * Depends: + * jquery.ui.widget.js + */ +(function(b,c){var a=false;b(document).mouseup(function(d){a=false});b.widget("ui.mouse",{options:{cancel:":input,option",distance:1,delay:0},_mouseInit:function(){var d=this;this.element.bind("mousedown."+this.widgetName,function(e){return d._mouseDown(e)}).bind("click."+this.widgetName,function(e){if(true===b.data(e.target,d.widgetName+".preventClickEvent")){b.removeData(e.target,d.widgetName+".preventClickEvent");e.stopImmediatePropagation();return false}});this.started=false},_mouseDestroy:function(){this.element.unbind("."+this.widgetName)},_mouseDown:function(f){if(a){return}(this._mouseStarted&&this._mouseUp(f));this._mouseDownEvent=f;var e=this,g=(f.which==1),d=(typeof this.options.cancel=="string"&&f.target.nodeName?b(f.target).closest(this.options.cancel).length:false);if(!g||d||!this._mouseCapture(f)){return true}this.mouseDelayMet=!this.options.delay;if(!this.mouseDelayMet){this._mouseDelayTimer=setTimeout(function(){e.mouseDelayMet=true},this.options.delay)}if(this._mouseDistanceMet(f)&&this._mouseDelayMet(f)){this._mouseStarted=(this._mouseStart(f)!==false);if(!this._mouseStarted){f.preventDefault();return true}}if(true===b.data(f.target,this.widgetName+".preventClickEvent")){b.removeData(f.target,this.widgetName+".preventClickEvent")}this._mouseMoveDelegate=function(h){return e._mouseMove(h)};this._mouseUpDelegate=function(h){return e._mouseUp(h)};b(document).bind("mousemove."+this.widgetName,this._mouseMoveDelegate).bind("mouseup."+this.widgetName,this._mouseUpDelegate);f.preventDefault();a=true;return true},_mouseMove:function(d){if(b.browser.msie&&!(document.documentMode>=9)&&!d.button){return this._mouseUp(d)}if(this._mouseStarted){this._mouseDrag(d);return d.preventDefault()}if(this._mouseDistanceMet(d)&&this._mouseDelayMet(d)){this._mouseStarted=(this._mouseStart(this._mouseDownEvent,d)!==false);(this._mouseStarted?this._mouseDrag(d):this._mouseUp(d))}return !this._mouseStarted},_mouseUp:function(d){b(document).unbind("mousemove."+this.widgetName,this._mouseMoveDelegate).unbind("mouseup."+this.widgetName,this._mouseUpDelegate);if(this._mouseStarted){this._mouseStarted=false;if(d.target==this._mouseDownEvent.target){b.data(d.target,this.widgetName+".preventClickEvent",true)}this._mouseStop(d)}return false},_mouseDistanceMet:function(d){return(Math.max(Math.abs(this._mouseDownEvent.pageX-d.pageX),Math.abs(this._mouseDownEvent.pageY-d.pageY))>=this.options.distance)},_mouseDelayMet:function(d){return this.mouseDelayMet},_mouseStart:function(d){},_mouseDrag:function(d){},_mouseStop:function(d){},_mouseCapture:function(d){return true}})})(jQuery);(function(c,d){c.widget("ui.resizable",c.ui.mouse,{widgetEventPrefix:"resize",options:{alsoResize:false,animate:false,animateDuration:"slow",animateEasing:"swing",aspectRatio:false,autoHide:false,containment:false,ghost:false,grid:false,handles:"e,s,se",helper:false,maxHeight:null,maxWidth:null,minHeight:10,minWidth:10,zIndex:1000},_create:function(){var f=this,k=this.options;this.element.addClass("ui-resizable");c.extend(this,{_aspectRatio:!!(k.aspectRatio),aspectRatio:k.aspectRatio,originalElement:this.element,_proportionallyResizeElements:[],_helper:k.helper||k.ghost||k.animate?k.helper||"ui-resizable-helper":null});if(this.element[0].nodeName.match(/canvas|textarea|input|select|button|img/i)){this.element.wrap(c('
').css({position:this.element.css("position"),width:this.element.outerWidth(),height:this.element.outerHeight(),top:this.element.css("top"),left:this.element.css("left")}));this.element=this.element.parent().data("resizable",this.element.data("resizable"));this.elementIsWrapper=true;this.element.css({marginLeft:this.originalElement.css("marginLeft"),marginTop:this.originalElement.css("marginTop"),marginRight:this.originalElement.css("marginRight"),marginBottom:this.originalElement.css("marginBottom")});this.originalElement.css({marginLeft:0,marginTop:0,marginRight:0,marginBottom:0});this.originalResizeStyle=this.originalElement.css("resize");this.originalElement.css("resize","none");this._proportionallyResizeElements.push(this.originalElement.css({position:"static",zoom:1,display:"block"}));this.originalElement.css({margin:this.originalElement.css("margin")});this._proportionallyResize()}this.handles=k.handles||(!c(".ui-resizable-handle",this.element).length?"e,s,se":{n:".ui-resizable-n",e:".ui-resizable-e",s:".ui-resizable-s",w:".ui-resizable-w",se:".ui-resizable-se",sw:".ui-resizable-sw",ne:".ui-resizable-ne",nw:".ui-resizable-nw"});if(this.handles.constructor==String){if(this.handles=="all"){this.handles="n,e,s,w,se,sw,ne,nw"}var l=this.handles.split(",");this.handles={};for(var g=0;g
');if(/sw|se|ne|nw/.test(j)){h.css({zIndex:++k.zIndex})}if("se"==j){h.addClass("ui-icon ui-icon-gripsmall-diagonal-se")}this.handles[j]=".ui-resizable-"+j;this.element.append(h)}}this._renderAxis=function(q){q=q||this.element;for(var n in this.handles){if(this.handles[n].constructor==String){this.handles[n]=c(this.handles[n],this.element).show()}if(this.elementIsWrapper&&this.originalElement[0].nodeName.match(/textarea|input|select|button/i)){var o=c(this.handles[n],this.element),p=0;p=/sw|ne|nw|se|n|s/.test(n)?o.outerHeight():o.outerWidth();var m=["padding",/ne|nw|n/.test(n)?"Top":/se|sw|s/.test(n)?"Bottom":/^e$/.test(n)?"Right":"Left"].join("");q.css(m,p);this._proportionallyResize()}if(!c(this.handles[n]).length){continue}}};this._renderAxis(this.element);this._handles=c(".ui-resizable-handle",this.element).disableSelection();this._handles.mouseover(function(){if(!f.resizing){if(this.className){var i=this.className.match(/ui-resizable-(se|sw|ne|nw|n|e|s|w)/i)}f.axis=i&&i[1]?i[1]:"se"}});if(k.autoHide){this._handles.hide();c(this.element).addClass("ui-resizable-autohide").hover(function(){if(k.disabled){return}c(this).removeClass("ui-resizable-autohide");f._handles.show()},function(){if(k.disabled){return}if(!f.resizing){c(this).addClass("ui-resizable-autohide");f._handles.hide()}})}this._mouseInit()},destroy:function(){this._mouseDestroy();var e=function(g){c(g).removeClass("ui-resizable ui-resizable-disabled ui-resizable-resizing").removeData("resizable").unbind(".resizable").find(".ui-resizable-handle").remove()};if(this.elementIsWrapper){e(this.element);var f=this.element;f.after(this.originalElement.css({position:f.css("position"),width:f.outerWidth(),height:f.outerHeight(),top:f.css("top"),left:f.css("left")})).remove()}this.originalElement.css("resize",this.originalResizeStyle);e(this.originalElement);return this},_mouseCapture:function(f){var g=false;for(var e in this.handles){if(c(this.handles[e])[0]==f.target){g=true}}return !this.options.disabled&&g},_mouseStart:function(g){var j=this.options,f=this.element.position(),e=this.element;this.resizing=true;this.documentScroll={top:c(document).scrollTop(),left:c(document).scrollLeft()};if(e.is(".ui-draggable")||(/absolute/).test(e.css("position"))){e.css({position:"absolute",top:f.top,left:f.left})}this._renderProxy();var k=b(this.helper.css("left")),h=b(this.helper.css("top"));if(j.containment){k+=c(j.containment).scrollLeft()||0;h+=c(j.containment).scrollTop()||0}this.offset=this.helper.offset();this.position={left:k,top:h};this.size=this._helper?{width:e.outerWidth(),height:e.outerHeight()}:{width:e.width(),height:e.height()};this.originalSize=this._helper?{width:e.outerWidth(),height:e.outerHeight()}:{width:e.width(),height:e.height()};this.originalPosition={left:k,top:h};this.sizeDiff={width:e.outerWidth()-e.width(),height:e.outerHeight()-e.height()};this.originalMousePosition={left:g.pageX,top:g.pageY};this.aspectRatio=(typeof j.aspectRatio=="number")?j.aspectRatio:((this.originalSize.width/this.originalSize.height)||1);var i=c(".ui-resizable-"+this.axis).css("cursor");c("body").css("cursor",i=="auto"?this.axis+"-resize":i);e.addClass("ui-resizable-resizing");this._propagate("start",g);return true},_mouseDrag:function(e){var h=this.helper,g=this.options,m={},q=this,j=this.originalMousePosition,n=this.axis;var r=(e.pageX-j.left)||0,p=(e.pageY-j.top)||0;var i=this._change[n];if(!i){return false}var l=i.apply(this,[e,r,p]),k=c.browser.msie&&c.browser.version<7,f=this.sizeDiff;this._updateVirtualBoundaries(e.shiftKey);if(this._aspectRatio||e.shiftKey){l=this._updateRatio(l,e)}l=this._respectSize(l,e);this._propagate("resize",e);h.css({top:this.position.top+"px",left:this.position.left+"px",width:this.size.width+"px",height:this.size.height+"px"});if(!this._helper&&this._proportionallyResizeElements.length){this._proportionallyResize()}this._updateCache(l);this._trigger("resize",e,this.ui());return false},_mouseStop:function(h){this.resizing=false;var i=this.options,m=this;if(this._helper){var g=this._proportionallyResizeElements,e=g.length&&(/textarea/i).test(g[0].nodeName),f=e&&c.ui.hasScroll(g[0],"left")?0:m.sizeDiff.height,k=e?0:m.sizeDiff.width;var n={width:(m.helper.width()-k),height:(m.helper.height()-f)},j=(parseInt(m.element.css("left"),10)+(m.position.left-m.originalPosition.left))||null,l=(parseInt(m.element.css("top"),10)+(m.position.top-m.originalPosition.top))||null;if(!i.animate){this.element.css(c.extend(n,{top:l,left:j}))}m.helper.height(m.size.height);m.helper.width(m.size.width);if(this._helper&&!i.animate){this._proportionallyResize()}}c("body").css("cursor","auto");this.element.removeClass("ui-resizable-resizing");this._propagate("stop",h);if(this._helper){this.helper.remove()}return false},_updateVirtualBoundaries:function(g){var j=this.options,i,h,f,k,e;e={minWidth:a(j.minWidth)?j.minWidth:0,maxWidth:a(j.maxWidth)?j.maxWidth:Infinity,minHeight:a(j.minHeight)?j.minHeight:0,maxHeight:a(j.maxHeight)?j.maxHeight:Infinity};if(this._aspectRatio||g){i=e.minHeight*this.aspectRatio;f=e.minWidth/this.aspectRatio;h=e.maxHeight*this.aspectRatio;k=e.maxWidth/this.aspectRatio;if(i>e.minWidth){e.minWidth=i}if(f>e.minHeight){e.minHeight=f}if(hl.width),s=a(l.height)&&i.minHeight&&(i.minHeight>l.height);if(h){l.width=i.minWidth}if(s){l.height=i.minHeight}if(t){l.width=i.maxWidth}if(m){l.height=i.maxHeight}var f=this.originalPosition.left+this.originalSize.width,p=this.position.top+this.size.height;var k=/sw|nw|w/.test(q),e=/nw|ne|n/.test(q);if(h&&k){l.left=f-i.minWidth}if(t&&k){l.left=f-i.maxWidth}if(s&&e){l.top=p-i.minHeight}if(m&&e){l.top=p-i.maxHeight}var n=!l.width&&!l.height;if(n&&!l.left&&l.top){l.top=null}else{if(n&&!l.top&&l.left){l.left=null}}return l},_proportionallyResize:function(){var k=this.options;if(!this._proportionallyResizeElements.length){return}var g=this.helper||this.element;for(var f=0;f');var e=c.browser.msie&&c.browser.version<7,g=(e?1:0),h=(e?2:-1);this.helper.addClass(this._helper).css({width:this.element.outerWidth()+h,height:this.element.outerHeight()+h,position:"absolute",left:this.elementOffset.left-g+"px",top:this.elementOffset.top-g+"px",zIndex:++i.zIndex});this.helper.appendTo("body").disableSelection()}else{this.helper=this.element}},_change:{e:function(g,f,e){return{width:this.originalSize.width+f}},w:function(h,f,e){var j=this.options,g=this.originalSize,i=this.originalPosition;return{left:i.left+f,width:g.width-f}},n:function(h,f,e){var j=this.options,g=this.originalSize,i=this.originalPosition;return{top:i.top+e,height:g.height-e}},s:function(g,f,e){return{height:this.originalSize.height+e}},se:function(g,f,e){return c.extend(this._change.s.apply(this,arguments),this._change.e.apply(this,[g,f,e]))},sw:function(g,f,e){return c.extend(this._change.s.apply(this,arguments),this._change.w.apply(this,[g,f,e]))},ne:function(g,f,e){return c.extend(this._change.n.apply(this,arguments),this._change.e.apply(this,[g,f,e]))},nw:function(g,f,e){return c.extend(this._change.n.apply(this,arguments),this._change.w.apply(this,[g,f,e]))}},_propagate:function(f,e){c.ui.plugin.call(this,f,[e,this.ui()]);(f!="resize"&&this._trigger(f,e,this.ui()))},plugins:{},ui:function(){return{originalElement:this.originalElement,element:this.element,helper:this.helper,position:this.position,size:this.size,originalSize:this.originalSize,originalPosition:this.originalPosition}}});c.extend(c.ui.resizable,{version:"1.8.18"});c.ui.plugin.add("resizable","alsoResize",{start:function(f,g){var e=c(this).data("resizable"),i=e.options;var h=function(j){c(j).each(function(){var k=c(this);k.data("resizable-alsoresize",{width:parseInt(k.width(),10),height:parseInt(k.height(),10),left:parseInt(k.css("left"),10),top:parseInt(k.css("top"),10)})})};if(typeof(i.alsoResize)=="object"&&!i.alsoResize.parentNode){if(i.alsoResize.length){i.alsoResize=i.alsoResize[0];h(i.alsoResize)}else{c.each(i.alsoResize,function(j){h(j)})}}else{h(i.alsoResize)}},resize:function(g,i){var f=c(this).data("resizable"),j=f.options,h=f.originalSize,l=f.originalPosition;var k={height:(f.size.height-h.height)||0,width:(f.size.width-h.width)||0,top:(f.position.top-l.top)||0,left:(f.position.left-l.left)||0},e=function(m,n){c(m).each(function(){var q=c(this),r=c(this).data("resizable-alsoresize"),p={},o=n&&n.length?n:q.parents(i.originalElement[0]).length?["width","height"]:["width","height","top","left"];c.each(o,function(s,u){var t=(r[u]||0)+(k[u]||0);if(t&&t>=0){p[u]=t||null}});q.css(p)})};if(typeof(j.alsoResize)=="object"&&!j.alsoResize.nodeType){c.each(j.alsoResize,function(m,n){e(m,n)})}else{e(j.alsoResize)}},stop:function(e,f){c(this).removeData("resizable-alsoresize")}});c.ui.plugin.add("resizable","animate",{stop:function(i,n){var p=c(this).data("resizable"),j=p.options;var h=p._proportionallyResizeElements,e=h.length&&(/textarea/i).test(h[0].nodeName),f=e&&c.ui.hasScroll(h[0],"left")?0:p.sizeDiff.height,l=e?0:p.sizeDiff.width;var g={width:(p.size.width-l),height:(p.size.height-f)},k=(parseInt(p.element.css("left"),10)+(p.position.left-p.originalPosition.left))||null,m=(parseInt(p.element.css("top"),10)+(p.position.top-p.originalPosition.top))||null;p.element.animate(c.extend(g,m&&k?{top:m,left:k}:{}),{duration:j.animateDuration,easing:j.animateEasing,step:function(){var o={width:parseInt(p.element.css("width"),10),height:parseInt(p.element.css("height"),10),top:parseInt(p.element.css("top"),10),left:parseInt(p.element.css("left"),10)};if(h&&h.length){c(h[0]).css({width:o.width,height:o.height})}p._updateCache(o);p._propagate("resize",i)}})}});c.ui.plugin.add("resizable","containment",{start:function(f,r){var t=c(this).data("resizable"),j=t.options,l=t.element;var g=j.containment,k=(g instanceof c)?g.get(0):(/parent/.test(g))?l.parent().get(0):g;if(!k){return}t.containerElement=c(k);if(/document/.test(g)||g==document){t.containerOffset={left:0,top:0};t.containerPosition={left:0,top:0};t.parentData={element:c(document),left:0,top:0,width:c(document).width(),height:c(document).height()||document.body.parentNode.scrollHeight}}else{var n=c(k),i=[];c(["Top","Right","Left","Bottom"]).each(function(p,o){i[p]=b(n.css("padding"+o))});t.containerOffset=n.offset();t.containerPosition=n.position();t.containerSize={height:(n.innerHeight()-i[3]),width:(n.innerWidth()-i[1])};var q=t.containerOffset,e=t.containerSize.height,m=t.containerSize.width,h=(c.ui.hasScroll(k,"left")?k.scrollWidth:m),s=(c.ui.hasScroll(k)?k.scrollHeight:e);t.parentData={element:k,left:q.left,top:q.top,width:h,height:s}}},resize:function(g,q){var t=c(this).data("resizable"),i=t.options,f=t.containerSize,p=t.containerOffset,m=t.size,n=t.position,r=t._aspectRatio||g.shiftKey,e={top:0,left:0},h=t.containerElement;if(h[0]!=document&&(/static/).test(h.css("position"))){e=p}if(n.left<(t._helper?p.left:0)){t.size.width=t.size.width+(t._helper?(t.position.left-p.left):(t.position.left-e.left));if(r){t.size.height=t.size.width/i.aspectRatio}t.position.left=i.helper?p.left:0}if(n.top<(t._helper?p.top:0)){t.size.height=t.size.height+(t._helper?(t.position.top-p.top):t.position.top);if(r){t.size.width=t.size.height*i.aspectRatio}t.position.top=t._helper?p.top:0}t.offset.left=t.parentData.left+t.position.left;t.offset.top=t.parentData.top+t.position.top;var l=Math.abs((t._helper?t.offset.left-e.left:(t.offset.left-e.left))+t.sizeDiff.width),s=Math.abs((t._helper?t.offset.top-e.top:(t.offset.top-p.top))+t.sizeDiff.height);var k=t.containerElement.get(0)==t.element.parent().get(0),j=/relative|absolute/.test(t.containerElement.css("position"));if(k&&j){l-=t.parentData.left}if(l+t.size.width>=t.parentData.width){t.size.width=t.parentData.width-l;if(r){t.size.height=t.size.width/t.aspectRatio}}if(s+t.size.height>=t.parentData.height){t.size.height=t.parentData.height-s;if(r){t.size.width=t.size.height*t.aspectRatio}}},stop:function(f,n){var q=c(this).data("resizable"),g=q.options,l=q.position,m=q.containerOffset,e=q.containerPosition,i=q.containerElement;var j=c(q.helper),r=j.offset(),p=j.outerWidth()-q.sizeDiff.width,k=j.outerHeight()-q.sizeDiff.height;if(q._helper&&!g.animate&&(/relative/).test(i.css("position"))){c(this).css({left:r.left-e.left-m.left,width:p,height:k})}if(q._helper&&!g.animate&&(/static/).test(i.css("position"))){c(this).css({left:r.left-e.left-m.left,width:p,height:k})}}});c.ui.plugin.add("resizable","ghost",{start:function(g,h){var e=c(this).data("resizable"),i=e.options,f=e.size;e.ghost=e.originalElement.clone();e.ghost.css({opacity:0.25,display:"block",position:"relative",height:f.height,width:f.width,margin:0,left:0,top:0}).addClass("ui-resizable-ghost").addClass(typeof i.ghost=="string"?i.ghost:"");e.ghost.appendTo(e.helper)},resize:function(f,g){var e=c(this).data("resizable"),h=e.options;if(e.ghost){e.ghost.css({position:"relative",height:e.size.height,width:e.size.width})}},stop:function(f,g){var e=c(this).data("resizable"),h=e.options;if(e.ghost&&e.helper){e.helper.get(0).removeChild(e.ghost.get(0))}}});c.ui.plugin.add("resizable","grid",{resize:function(e,m){var p=c(this).data("resizable"),h=p.options,k=p.size,i=p.originalSize,j=p.originalPosition,n=p.axis,l=h._aspectRatio||e.shiftKey;h.grid=typeof h.grid=="number"?[h.grid,h.grid]:h.grid;var g=Math.round((k.width-i.width)/(h.grid[0]||1))*(h.grid[0]||1),f=Math.round((k.height-i.height)/(h.grid[1]||1))*(h.grid[1]||1);if(/^(se|s|e)$/.test(n)){p.size.width=i.width+g;p.size.height=i.height+f}else{if(/^(ne)$/.test(n)){p.size.width=i.width+g;p.size.height=i.height+f;p.position.top=j.top-f}else{if(/^(sw)$/.test(n)){p.size.width=i.width+g;p.size.height=i.height+f;p.position.left=j.left-g}else{p.size.width=i.width+g;p.size.height=i.height+f;p.position.top=j.top-f;p.position.left=j.left-g}}}}});var b=function(e){return parseInt(e,10)||0};var a=function(e){return !isNaN(parseInt(e,10))}})(jQuery);/*! + * jQuery hashchange event - v1.3 - 7/21/2010 + * http://benalman.com/projects/jquery-hashchange-plugin/ + * + * Copyright (c) 2010 "Cowboy" Ben Alman + * Dual licensed under the MIT and GPL licenses. + * http://benalman.com/about/license/ + */ +(function($,e,b){var c="hashchange",h=document,f,g=$.event.special,i=h.documentMode,d="on"+c in e&&(i===b||i>7);function a(j){j=j||location.href;return"#"+j.replace(/^[^#]*#?(.*)$/,"$1")}$.fn[c]=function(j){return j?this.bind(c,j):this.trigger(c)};$.fn[c].delay=50;g[c]=$.extend(g[c],{setup:function(){if(d){return false}$(f.start)},teardown:function(){if(d){return false}$(f.stop)}});f=(function(){var j={},p,m=a(),k=function(q){return q},l=k,o=k;j.start=function(){p||n()};j.stop=function(){p&&clearTimeout(p);p=b};function n(){var r=a(),q=o(m);if(r!==m){l(m=r,q);$(e).trigger(c)}else{if(q!==m){location.href=location.href.replace(/#.*/,"")+q}}p=setTimeout(n,$.fn[c].delay)}$.browser.msie&&!d&&(function(){var q,r;j.start=function(){if(!q){r=$.fn[c].src;r=r&&r+a();q=$(' + + + + +
+ +
+
linear_scaling.h File Reference
+
+
+ +

Implements the BLAS linear scaling function alpha*AB + beta*C. +More...

+ +

Go to the source code of this file.

+
+ + + + + + + +

+Classes

struct  cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >
 Functor to compute linear combination of fragments. More...
 
struct  cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params
 The parameters. More...
 
+ + + + + +

+Namespaces

 cutlass
 
 cutlass::gemm
 
+ + + + + diff --git a/docs/generated-html/linear__scaling_8h_source.html b/docs/generated-html/linear__scaling_8h_source.html new file mode 100644 index 0000000000..d9817ed095 --- /dev/null +++ b/docs/generated-html/linear__scaling_8h_source.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: linear_scaling.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
linear_scaling.h
+
+
+Go to the documentation of this file.
1 
2 /***************************************************************************************************
3  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without modification, are permitted
6  * provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright notice, this list of
8  * conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright notice, this list of
10  * conditions and the following disclaimer in the documentation and/or other materials
11  * provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
13  * to endorse or promote products derived from this software without specific prior written
14  * permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
18  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  **************************************************************************************************/
29 #pragma once
30 
32 
33 namespace cutlass {
34 namespace gemm {
35 
37 
39 template <typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_> >
40 struct LinearScaling {
41  // The scalar.
42  typedef Scalar_ Scalar;
43  // The adapater.
44  typedef FragmentMultiplyAdd_ FragmentMultiplyAdd;
45 
47  struct Params {
50 
52  template <typename GemmDesc_>
53  CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const& desc) {
54  alpha = desc.alpha;
55  beta = desc.beta;
56  return 0;
57  }
58  };
59 
61  CUTLASS_DEVICE LinearScaling(Params const& params) : alpha(params.alpha), beta(params.beta) {}
62 
64  template <typename Fragment_>
65  CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_& output) {
67  mad.multiply(alpha, accum, output);
68  }
69 
71  template <typename Fragment_>
72  CUTLASS_DEVICE void evaluate(Fragment_ const& accum, Fragment_ const& old, Fragment_& output) {
74  Fragment_ tmp;
75  mad.multiply(beta, old, tmp);
76  mad.multiply_add(alpha, accum, tmp, output);
77  }
78 
81 };
82 
84 
85 } // namespace gemm
86 } // namespace cutlass
Definition: convert.h:33
+
Scalar alpha
The alpha/beta scaling params.
Definition: linear_scaling.h:49
+
Scalar alpha
The alpha/beta scaling factors.
Definition: linear_scaling.h:80
+
CUTLASS_DEVICE LinearScaling(Params const &params)
Ctor.
Definition: linear_scaling.h:61
+
CUTLASS_DEVICE void evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)
Evaluate the functor.
Definition: linear_scaling.h:72
+
Scalar beta
Definition: linear_scaling.h:49
+
CUTLASS_HOST_DEVICE int initialize(GemmDesc_ const &desc)
Initialize the parameters.
Definition: linear_scaling.h:53
+
Scalar beta
Definition: linear_scaling.h:80
+
Defines multiply-add operations on fragments within a thread.
+
FragmentMultiplyAdd_ FragmentMultiplyAdd
Definition: linear_scaling.h:44
+
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
+
CUTLASS_DEVICE void evaluate(Fragment_ const &accum, Fragment_ &output)
Evaluate the functor.
Definition: linear_scaling.h:65
+
The parameters.
Definition: linear_scaling.h:47
+
Functor to compute linear combination of fragments.
Definition: linear_scaling.h:40
+
Scalar_ Scalar
Definition: linear_scaling.h:42
+
+ + + + diff --git a/docs/generated-html/load__store_8h.html b/docs/generated-html/load__store_8h.html new file mode 100644 index 0000000000..b23ec3cbff --- /dev/null +++ b/docs/generated-html/load__store_8h.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: load_store.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
load_store.h File Reference
+
+ + + + + diff --git a/docs/generated-html/load__store_8h_source.html b/docs/generated-html/load__store_8h_source.html new file mode 100644 index 0000000000..e421cbf273 --- /dev/null +++ b/docs/generated-html/load__store_8h_source.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: load_store.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
load_store.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include <cutlass/vector.h>
31 
32 namespace cutlass {
33 
35 
39 struct MemorySpace {
40  enum Kind {
41  kGeneric, // Data accessed through pointer dereferencing
42  kShared, // Data resides in shared memory
43  kGlobal // Data resides in global memory
44  };
45 };
46 
48 
49 template <typename Scalar_,
50  int Lanes_,
51  MemorySpace::Kind Memory_,
52  bool = (Lanes_ > 1),
53  size_t = (sizeof(Scalar_) * Lanes_)>
54 struct Load {
57 
59  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
60  dst = reinterpret_cast<AccessType const*>(&pointer[offset])[0];
61  }
62 };
63 
65 
66 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
67 struct Load<Scalar_, Lanes_, Memory_, true, 4> {
70 
72  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
73  dst.registers[0] = reinterpret_cast<uint32_t const*>(&pointer[offset])[0];
74  }
75 };
76 
78 
79 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
80 struct Load<Scalar_, Lanes_, Memory_, true, 8> {
83 
85  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
86  uint2 tmp = reinterpret_cast<uint2 const*>(&pointer[offset])[0];
87  dst.registers[0] = tmp.x;
88  dst.registers[1] = tmp.y;
89  }
90 };
91 
93 
94 template <MemorySpace::Kind Memory_>
95 struct Load<double, 2, Memory_, true, 16> {
98 
100  static CUTLASS_DEVICE void load(AccessType& dst, double const* pointer, int offset) {
101  double2 tmp = reinterpret_cast<double2 const*>(&pointer[offset])[0];
102  dst[0] = tmp.x;
103  dst[1] = tmp.y;
104  }
105 };
106 
108 
109 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
110 struct Load<Scalar_, Lanes_, Memory_, true, 16> {
113 
115  static CUTLASS_DEVICE void load(AccessType& dst, Scalar_ const* pointer, int offset) {
116  uint4 tmp = reinterpret_cast<uint4 const*>(&pointer[offset])[0];
117  dst.registers[0] = tmp.x;
118  dst.registers[1] = tmp.y;
119  dst.registers[2] = tmp.z;
120  dst.registers[3] = tmp.w;
121  }
122 };
123 
125 
126 template <typename Scalar_,
127  int Lanes_,
128  MemorySpace::Kind Memory_,
129  bool = (Lanes_ > 1),
130  size_t = (sizeof(Scalar_) * Lanes_)>
131 struct Store {
134 
136  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
137  pointer[offset] = src;
138  }
139 };
140 
142 
143 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
144 struct Store<Scalar_, Lanes_, Memory_, true, 4> {
147 
149  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
150  uint32_t* addr = reinterpret_cast<uint32_t*>(&pointer[offset]);
151  addr[0] = src.registers[0];
152  }
153 };
154 
156 
157 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
158 struct Store<Scalar_, Lanes_, Memory_, true, 8> {
161 
163  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
164  uint2* addr = reinterpret_cast<uint2*>(&pointer[offset]);
165  addr[0] = make_uint2(src.registers[0], src.registers[1]);
166  }
167 };
168 
170 
171 template <MemorySpace::Kind Memory_>
172 struct Store<double, 2, Memory_, true, 16> {
175 
177  static CUTLASS_DEVICE void store(AccessType const& src, double* pointer, int offset) {
178  double2* addr = reinterpret_cast<double2*>(&pointer[offset]);
179  addr[0] = make_double2(src[0], src[1]);
180  }
181 };
182 
184 
185 template <typename Scalar_, int Lanes_, MemorySpace::Kind Memory_>
186 struct Store<Scalar_, Lanes_, Memory_, true, 16> {
189 
191  static CUTLASS_DEVICE void store(AccessType const& src, Scalar_* pointer, int offset) {
192  uint4* addr = reinterpret_cast<uint4*>(&pointer[offset]);
193  addr[0] = make_uint4(src.registers[0], src.registers[1], src.registers[2], src.registers[3]);
194  }
195 };
196 
198 
199 } // namespace cutlass
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:188
+
Definition: load_store.h:42
+
Definition: convert.h:33
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:163
+
Enum to specify which memory space data resides in.
Definition: load_store.h:39
+
Definition: load_store.h:43
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:59
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:112
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:146
+
Kind
Definition: load_store.h:40
+
Definition: load_store.h:131
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:136
+
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:80
+
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:174
+
Definition: load_store.h:41
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:72
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:133
+
Definition: vector.h:61
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:85
+
Definition: load_store.h:54
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:82
+
Defines a 1D vector of elements held in the registers of each thread.
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:160
+
static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The store function.
Definition: load_store.h:115
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:69
+
static CUTLASS_DEVICE void load(AccessType &dst, double const *pointer, int offset)
The store function.
Definition: load_store.h:100
+
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:97
+
Vectorize< Scalar_, Lanes_ >::Type AccessType
The output type.
Definition: load_store.h:56
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:191
+
static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:149
+
static CUTLASS_DEVICE void store(AccessType const &src, double *pointer, int offset)
The store function.
Definition: load_store.h:177
+
+ + + + diff --git a/docs/generated-html/matrix__traits_8h.html b/docs/generated-html/matrix__traits_8h.html new file mode 100644 index 0000000000..f83c89f0df --- /dev/null +++ b/docs/generated-html/matrix__traits_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: matrix_traits.h File Reference + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+ +
+
matrix_traits.h File Reference
+
+
+ +

Defines properties of matrices used to denote layout and operands to GEMM kernels. +More...

+ +

Go to the source code of this file.

+ + + + + + + + +

+Classes

struct  cutlass::MatrixLayout
 Describes layouts of matrices. More...
 
struct  cutlass::GemmOperand
 Gemm operand - D = A * B + C. More...
 
+ + + +

+Namespaces

 cutlass
 
+
+ + + + diff --git a/docs/generated-html/matrix__traits_8h_source.html b/docs/generated-html/matrix__traits_8h_source.html new file mode 100644 index 0000000000..9f8de2dc64 --- /dev/null +++ b/docs/generated-html/matrix__traits_8h_source.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: matrix_traits.h Source File + + + + + + + + + + +
+
+ + + + + + +
+
Cutlass +
+
CUDA Templates for Linear Algebra Subroutines and Solvers
+
+
+ + + + + + + + +
+
+ + +
+ +
+ + +
+
+
+
matrix_traits.h
+
+
+Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 namespace cutlass {
31 
33 
35 struct MatrixLayout {
37 };
38 
40 
42 struct GemmOperand {
43  enum Kind { kA, kB, kC, kD };
44 };
45 
47 
48 } // namespace cutlass
Definition: convert.h:33
+
Definition: matrix_traits.h:43
+
Describes layouts of matrices.
Definition: matrix_traits.h:35
+
Definition: matrix_traits.h:36
+
Definition: matrix_traits.h:43
+
Gemm operand - D = A * B + C.
Definition: matrix_traits.h:42
+
Definition: matrix_traits.h:36
+
Kind
Definition: matrix_traits.h:36
+
Kind
Definition: matrix_traits.h:43
+
Definition: matrix_traits.h:43
+
Definition: matrix_traits.h:43
+
+ + + + diff --git a/docs/generated-html/menu.js b/docs/generated-html/menu.js new file mode 100644 index 0000000000..89aaf575c6 --- /dev/null +++ b/docs/generated-html/menu.js @@ -0,0 +1,50 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function initMenu(relPath,searchEnabled,serverSide,searchPage,search) { + function makeTree(data,relPath) { + var result=''; + if ('children' in data) { + result+=''; + } + return result; + } + + $('#main-nav').append(makeTree(menudata,relPath)); + $('#main-nav').children(':first').addClass('sm sm-dox').attr('id','main-menu'); + if (searchEnabled) { + if (serverSide) { + $('#main-menu').append('
  • '); + } else { + $('#main-menu').append('
  • '); + } + } + $('#main-menu').smartmenus(); +} +/* @license-end */ diff --git a/docs/generated-html/menudata.js b/docs/generated-html/menudata.js new file mode 100644 index 0000000000..725988aa85 --- /dev/null +++ b/docs/generated-html/menudata.js @@ -0,0 +1,151 @@ +/* +@ @licstart The following is the entire license notice for the +JavaScript code in this file. + +Copyright (C) 1997-2017 by Dimitri van Heesch + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +@licend The above is the entire license notice +for the JavaScript code in this file +*/ +var menudata={children:[ +{text:"Main Page",url:"index.html"}, +{text:"Modules",url:"modules.html"}, +{text:"Namespaces",url:"namespaces.html",children:[ +{text:"Namespace List",url:"namespaces.html"}, +{text:"Namespace Members",url:"namespacemembers.html",children:[ +{text:"All",url:"namespacemembers.html",children:[ +{text:"_",url:"namespacemembers.html#index__"}, +{text:"c",url:"namespacemembers.html#index_c"}, +{text:"f",url:"namespacemembers.html#index_f"}, +{text:"g",url:"namespacemembers.html#index_g"}, +{text:"i",url:"namespacemembers.html#index_i"}, +{text:"l",url:"namespacemembers.html#index_l"}, +{text:"m",url:"namespacemembers.html#index_m"}, +{text:"o",url:"namespacemembers.html#index_o"}, +{text:"r",url:"namespacemembers.html#index_r"}, +{text:"s",url:"namespacemembers.html#index_s"}, +{text:"t",url:"namespacemembers.html#index_t"}]}, +{text:"Functions",url:"namespacemembers_func.html",children:[ +{text:"_",url:"namespacemembers_func.html#index__"}, +{text:"c",url:"namespacemembers_func.html#index_c"}, +{text:"g",url:"namespacemembers_func.html#index_g"}, +{text:"i",url:"namespacemembers_func.html#index_i"}, +{text:"l",url:"namespacemembers_func.html#index_l"}, +{text:"m",url:"namespacemembers_func.html#index_m"}, +{text:"o",url:"namespacemembers_func.html#index_o"}, +{text:"r",url:"namespacemembers_func.html#index_r"}, +{text:"s",url:"namespacemembers_func.html#index_s"}]}, +{text:"Typedefs",url:"namespacemembers_type.html"}]}]}, +{text:"Classes",url:"annotated.html",children:[ +{text:"Class List",url:"annotated.html"}, +{text:"Class Index",url:"classes.html"}, +{text:"Class Hierarchy",url:"hierarchy.html"}, +{text:"Class Members",url:"functions.html",children:[ +{text:"All",url:"functions.html",children:[ +{text:"a",url:"functions.html#index_a"}, +{text:"b",url:"functions_b.html#index_b"}, +{text:"c",url:"functions_c.html#index_c"}, +{text:"d",url:"functions_d.html#index_d"}, +{text:"e",url:"functions_e.html#index_e"}, +{text:"f",url:"functions_f.html#index_f"}, +{text:"g",url:"functions_g.html#index_g"}, +{text:"h",url:"functions_h.html#index_h"}, +{text:"i",url:"functions_i.html#index_i"}, +{text:"k",url:"functions_k.html#index_k"}, +{text:"l",url:"functions_l.html#index_l"}, +{text:"m",url:"functions_m.html#index_m"}, +{text:"n",url:"functions_n.html#index_n"}, +{text:"o",url:"functions_o.html#index_o"}, +{text:"p",url:"functions_p.html#index_p"}, +{text:"r",url:"functions_r.html#index_r"}, +{text:"s",url:"functions_s.html#index_s"}, +{text:"t",url:"functions_t.html#index_t"}, +{text:"u",url:"functions_u.html#index_u"}, +{text:"v",url:"functions_v.html#index_v"}, +{text:"w",url:"functions_w.html#index_w"}, +{text:"y",url:"functions_y.html#index_y"}, +{text:"~",url:"functions_0x7e.html#index_0x7e"}]}, +{text:"Functions",url:"functions_func.html",children:[ +{text:"a",url:"functions_func.html#index_a"}, +{text:"b",url:"functions_func_b.html#index_b"}, +{text:"c",url:"functions_func_c.html#index_c"}, +{text:"d",url:"functions_func_d.html#index_d"}, +{text:"e",url:"functions_func_e.html#index_e"}, +{text:"f",url:"functions_func_f.html#index_f"}, +{text:"g",url:"functions_func_g.html#index_g"}, +{text:"h",url:"functions_func_h.html#index_h"}, +{text:"i",url:"functions_func_i.html#index_i"}, +{text:"l",url:"functions_func_l.html#index_l"}, +{text:"m",url:"functions_func_m.html#index_m"}, +{text:"o",url:"functions_func_o.html#index_o"}, +{text:"p",url:"functions_func_p.html#index_p"}, +{text:"r",url:"functions_func_r.html#index_r"}, +{text:"s",url:"functions_func_s.html#index_s"}, +{text:"t",url:"functions_func_t.html#index_t"}, +{text:"u",url:"functions_func_u.html#index_u"}, +{text:"v",url:"functions_func_v.html#index_v"}, +{text:"w",url:"functions_func_w.html#index_w"}, +{text:"~",url:"functions_func_0x7e.html#index_0x7e"}]}, +{text:"Variables",url:"functions_vars.html",children:[ +{text:"a",url:"functions_vars.html#index_a"}, +{text:"b",url:"functions_vars_b.html#index_b"}, +{text:"c",url:"functions_vars_c.html#index_c"}, +{text:"d",url:"functions_vars_d.html#index_d"}, +{text:"e",url:"functions_vars_e.html#index_e"}, +{text:"f",url:"functions_vars_f.html#index_f"}, +{text:"g",url:"functions_vars_g.html#index_g"}, +{text:"i",url:"functions_vars_i.html#index_i"}, +{text:"k",url:"functions_vars_k.html#index_k"}, +{text:"l",url:"functions_vars_l.html#index_l"}, +{text:"m",url:"functions_vars_m.html#index_m"}, +{text:"n",url:"functions_vars_n.html#index_n"}, +{text:"p",url:"functions_vars_p.html#index_p"}, +{text:"r",url:"functions_vars_r.html#index_r"}, +{text:"s",url:"functions_vars_s.html#index_s"}, +{text:"t",url:"functions_vars_t.html#index_t"}, +{text:"v",url:"functions_vars_v.html#index_v"}]}, +{text:"Typedefs",url:"functions_type.html",children:[ +{text:"a",url:"functions_type.html#index_a"}, +{text:"b",url:"functions_type_b.html#index_b"}, +{text:"c",url:"functions_type_c.html#index_c"}, +{text:"d",url:"functions_type_d.html#index_d"}, +{text:"e",url:"functions_type_e.html#index_e"}, +{text:"f",url:"functions_type_f.html#index_f"}, +{text:"g",url:"functions_type_g.html#index_g"}, +{text:"i",url:"functions_type_i.html#index_i"}, +{text:"l",url:"functions_type_l.html#index_l"}, +{text:"m",url:"functions_type_m.html#index_m"}, +{text:"n",url:"functions_type_n.html#index_n"}, +{text:"o",url:"functions_type_o.html#index_o"}, +{text:"p",url:"functions_type_p.html#index_p"}, +{text:"s",url:"functions_type_s.html#index_s"}, +{text:"t",url:"functions_type_t.html#index_t"}, +{text:"v",url:"functions_type_v.html#index_v"}, +{text:"w",url:"functions_type_w.html#index_w"}, +{text:"y",url:"functions_type_y.html#index_y"}]}, +{text:"Enumerations",url:"functions_enum.html"}, +{text:"Enumerator",url:"functions_eval.html",children:[ +{text:"a",url:"functions_eval.html#index_a"}, +{text:"k",url:"functions_eval.html#index_k"}, +{text:"m",url:"functions_eval.html#index_m"}, +{text:"v",url:"functions_eval.html#index_v"}]}]}]}, +{text:"Files",url:"files.html",children:[ +{text:"File List",url:"files.html"}, +{text:"File Members",url:"globals.html",children:[ +{text:"All",url:"globals.html"}, +{text:"Functions",url:"globals_func.html"}, +{text:"Macros",url:"globals_defs.html"}]}]}]} diff --git a/docs/generated-html/modules.html b/docs/generated-html/modules.html new file mode 100644 index 0000000000..c42247bd4f --- /dev/null +++ b/docs/generated-html/modules.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Modules + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    +
    Modules
    +
    + + + + + diff --git a/docs/generated-html/namespacecutlass.html b/docs/generated-html/namespacecutlass.html new file mode 100644 index 0000000000..989135cbaf --- /dev/null +++ b/docs/generated-html/namespacecutlass.html @@ -0,0 +1,1557 @@ + + + + + + + +Cutlass: cutlass Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + +
    +
    + +
    +
    cutlass Namespace Reference
    +
    +
    + + + + + + +

    +Namespaces

     gemm
     
     platform
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  AlignedStruct
     
    struct  ComputeOffsetFromShape
     Compute the offset for the given coordinates in a cube. More...
     
    struct  ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  ComputeOffsetFromStrides
     Compute the offset for the given coordinates in a cube. More...
     
    struct  ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  ComputeThreadOffsetFromStrides
     Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
     
    struct  ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >
     Specialization for D=1 and C=1. More...
     
    struct  ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >
     Specialization for D=1. More...
     
    struct  ConstPredicateTileAdapter
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  Convert
     
    struct  Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >
     
    struct  Coord
     Statically-sized array specifying Coords within a tensor. More...
     
    struct  Copy
     
    struct  divide_assert
     
    struct  Extent
     Returns the extent of a scalar or vector. More...
     
    struct  Extent< Vector< T, Lanes > >
     Returns the number of lanes of a vector if need be. More...
     
    struct  Extent< Vector< T, Lanes > const >
     Returns the number of lanes of a vector if need be. More...
     
    struct  Fragment
     A template defining Fragment Concept. More...
     
    struct  FragmentConstIterator
     
    struct  FragmentIterator
     A template defining Fragment Iterator Concept. More...
     
    struct  FragmentLoad
     
    struct  FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentStore
     
    struct  FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >
     
    struct  GemmOperand
     Gemm operand - D = A * B + C. More...
     
    struct  Identity
     Describes identity elements. More...
     
    struct  is_pow2
     
    struct  IteratorAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    struct  IteratorFragment
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
     
    struct  Load
     
    struct  Load< double, 2, Memory_, true, 16 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 16 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 4 >
     
    struct  Load< Scalar_, Lanes_, Memory_, true, 8 >
     
    struct  log2_down
     
    struct  log2_down< N, 1, Count >
     
    struct  log2_up
     
    struct  log2_up< N, 1, Count >
     
    struct  MatrixLayout
     Describes layouts of matrices. More...
     
    struct  MemorySpace
     Enum to specify which memory space data resides in. More...
     
    struct  PredicateTileAdapter
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  PredicateVector
     Statically sized array of bits implementing. More...
     
    struct  ReshapeTile
     
    struct  ReshapeTile< Tile_, kAccessSize_, true >
     
    struct  Shape
     A Shape implementing Layout Concept describing the dimensions of a cube. More...
     
    struct  ShapeAdd
     
    struct  ShapeCount
     Compute derived counted of a Layout Concept based class. More...
     
    struct  ShapeDiv
     
    struct  ShapeMax
     
    struct  ShapeMin
     
    struct  ShapeMul
     
    struct  ShapeScale
     
    struct  ShapeStrides
     
    struct  ShapeSub
     
    struct  sqrt_est
     
    struct  StorageType
     
    struct  StorageType< 1 >
     
    struct  StorageType< 2 >
     
    struct  StorageType< 4 >
     
    struct  Store
     
    struct  Store< double, 2, Memory_, true, 16 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 16 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 4 >
     
    struct  Store< Scalar_, Lanes_, Memory_, true, 8 >
     
    class  TensorRef
     Structure modeling a pointer and stride into a tensor. More...
     
    class  TensorView
     Host-side reference implementation of tensor operations. More...
     
    struct  TiledThreadOffset
     Basic thread offset function computed from a thread shape. More...
     
    struct  TileIteratorBase
     Iterator for accessing a stripmined tile in memory. More...
     
    struct  TileLoadIterator
     An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
     
    struct  TileStoreIterator
     An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
     
    struct  TileTraits
     A template defining Tile Traits Concept. More...
     
    struct  TileTraitsContiguousMajor
     
    struct  TileTraitsStandard
     Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
     
    struct  TileTraitsStrideMajor
     
    struct  TileTraitsWarpRake
     Tiling in which warps rake across the contiguous dimension. More...
     
    struct  TrivialPredicateTileAdapter
     Always returns true predicate. More...
     
    union  Vector
     
    union  Vector< half, kLanes_ >
     
    struct  Vectorize
     
    struct  Vectorize< Element_, 1 >
     
    struct  VectorTraits
     Traits describing properties of vectors and scalar-as-vectors. More...
     
    struct  VectorTraits< Vector< T, Lanes > >
     Partial specialization for actual cutlass::Vector. More...
     
    struct  VectorTraits< Vector< T, Lanes > const >
     Partial specialization for actual cutlass::Vector. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord (int _0)
     Helper to make a 2-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > make_Coord (int _0, int _1)
     Helper to make a 2-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > make_Coord (int _0, int _1, int _2)
     Helper to make a 3-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 4 > make_Coord (int _0, int _1, int _2, int _3)
     Helper to make a 4-element coordinate. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw (Coord< 3 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 2 > get_Coord_hw (Coord< 4 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_hwc (Coord< 4 > const &coord)
     Getter. More...
     
    CUTLASS_HOST_DEVICE Coord< 3 > get_Coord_dhw (Coord< 4 > const &coord)
     Getter. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_load (InputIterator &iterator, Fragment &fragment)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_load (InputIterator &iterator, Fragment &fragment, int d)
     Loads a fragment from a shared memory input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Loads a fragment from an input iterator, masked by a predicate iterator. More...
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load_post_increment (InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     
    template<typename InputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)
     Loads a fragment from an input iterator. More...
     
    template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_load (InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)
     Loads a fragment from an input iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator &iterator, Fragment &fragment)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_DEVICE void shared_iterator_store (OutputIterator &iterator, Fragment const &fragment)
     Stores a fragment to a shared memory output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store_post_increment (OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)
     Stores a fragment to an output iterator, masked by a predicate iterator. More...
     
    template<typename OutputIterator , typename Fragment >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)
     Stores a fragment to an output iterator. More...
     
    template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    CUTLASS_HOST_DEVICE void iterator_store (OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)
     Stores a fragment to an output iterator. More...
     
    template<typename dividend_t , typename divisor_t >
    CUTLASS_HOST_DEVICE dividend_t round_nearest (dividend_t dividend, divisor_t divisor)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t gcd (value_t a, value_t b)
     
    template<typename value_t >
    CUTLASS_HOST_DEVICE value_t lcm (value_t a, value_t b)
     
    __host__ CUTLASS_DEVICE cudaError_t cuda_perror_impl (cudaError_t error, const char *filename, int line)
     The corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context. More...
     
    template<>
    struct __align__ (1) AlignedStruct< 1 >
     
    template<>
    struct __align__ (2) AlignedStruct< 2 >
     
    template<>
    struct __align__ (4) AlignedStruct< 4 >
     
    template<>
    struct __align__ (8) AlignedStruct< 8 >
     
    template<>
    struct __align__ (16) AlignedStruct< 16 >
     
    template<>
    struct __align__ (32) AlignedStruct< 32 >
     
    template<>
    struct __align__ (64) AlignedStruct< 64 >
     
    template<typename Scalar_ >
    CUTLASS_DEVICE void make_zero (Scalar_ &x)
     
    template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void make_zero (Vector< Scalar_, kLanes_ > &vec)
     
    +

    Function Documentation

    + +

    ◆ __align__() [1/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [2/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [3/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [4/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ (16 )
    +
    + +
    +
    + +

    ◆ __align__() [5/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [6/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [7/7]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::__align__ ()
    +
    + +
    +
    + +

    ◆ cuda_perror_impl()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    __host__ CUTLASS_DEVICE cudaError_t cutlass::cuda_perror_impl (cudaError_t error,
    const char * filename,
    int line 
    )
    +
    +
    Returns
    The CUDA error.
    + +
    +
    + +

    ◆ gcd()

    + +
    +
    +
    +template<typename value_t >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE value_t cutlass::gcd (value_t a,
    value_t b 
    )
    +
    +

    Greatest common divisor

    + +
    +
    + +

    ◆ get_Coord_dhw()

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::get_Coord_dhw (Coord< 4 > const & coord)
    +
    + +
    +
    + +

    ◆ get_Coord_hw() [1/2]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<2> cutlass::get_Coord_hw (Coord< 3 > const & coord)
    +
    + +
    +
    + +

    ◆ get_Coord_hw() [2/2]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<2> cutlass::get_Coord_hw (Coord< 4 > const & coord)
    +
    + +
    +
    + +

    ◆ get_Coord_hwc()

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::get_Coord_hwc (Coord< 4 > const & coord)
    +
    + +
    +
    + +

    ◆ iterator_load() [1/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator & iterator,
    Fragmentfragment 
    )
    +
    + +
    +
    + +

    ◆ iterator_load() [2/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & _iterator,
    Fragmentfragment,
    typename InputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_load() [3/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_load() [4/4]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load (InputIterator const & iterator,
    Fragmentfragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ iterator_load_post_increment() [1/3]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_load_post_increment() [2/3]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    typename InputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_load_post_increment() [3/3]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_load_post_increment (InputIterator & iterator,
    Fragmentfragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [1/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator & iterator,
    Fragmentfragment 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [2/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & _iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [3/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_store() [4/4]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store (OutputIterator const & iterator,
    Fragment const & fragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ iterator_store_post_increment() [1/3]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset,
    ConstPredicateAdapter predicate_adapter 
    )
    +
    + +
    +
    + +

    ◆ iterator_store_post_increment() [2/3]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    typename OutputIterator::Index offset = 0 
    )
    +
    + +
    +
    + +

    ◆ iterator_store_post_increment() [3/3]

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment , typename ConstPredicateAdapter >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::iterator_store_post_increment (OutputIterator & iterator,
    Fragment const & fragment,
    ConstPredicateAdapter pred_it 
    )
    +
    + +
    +
    + +

    ◆ lcm()

    + +
    +
    +
    +template<typename value_t >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE value_t cutlass::lcm (value_t a,
    value_t b 
    )
    +
    +

    Least common multiple

    + +
    +
    + +

    ◆ make_Coord() [1/4]

    + +
    +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<1> cutlass::make_Coord (int _0)
    +
    + +
    +
    + +

    ◆ make_Coord() [2/4]

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<2> cutlass::make_Coord (int _0,
    int _1 
    )
    +
    + +
    +
    + +

    ◆ make_Coord() [3/4]

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<3> cutlass::make_Coord (int _0,
    int _1,
    int _2 
    )
    +
    + +
    +
    + +

    ◆ make_Coord() [4/4]

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::make_Coord (int _0,
    int _1,
    int _2,
    int _3 
    )
    +
    + +
    +
    + +

    ◆ make_zero() [1/2]

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::make_zero (Scalar_ & x)
    +
    + +
    +
    + +

    ◆ make_zero() [2/2]

    + +
    +
    +
    +template<typename Scalar_ , int kLanes_>
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::make_zero (Vector< Scalar_, kLanes_ > & vec)
    +
    + +
    +
    + +

    ◆ round_nearest()

    + +
    +
    +
    +template<typename dividend_t , typename divisor_t >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE dividend_t cutlass::round_nearest (dividend_t dividend,
    divisor_t divisor 
    )
    +
    +

    Round dividend up to the nearest multiple of divisor

    + +
    +
    + +

    ◆ shared_iterator_load() [1/2]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator & iterator,
    Fragmentfragment 
    )
    +
    + +
    +
    + +

    ◆ shared_iterator_load() [2/2]

    + +
    +
    +
    +template<typename InputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::shared_iterator_load (InputIterator & iterator,
    Fragmentfragment,
    int d 
    )
    +
    + +
    +
    + +

    ◆ shared_iterator_store()

    + +
    +
    +
    +template<typename OutputIterator , typename Fragment >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::shared_iterator_store (OutputIterator & iterator,
    Fragment const & fragment 
    )
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespacecutlass_1_1gemm.html b/docs/generated-html/namespacecutlass_1_1gemm.html new file mode 100644 index 0000000000..1c84e4480a --- /dev/null +++ b/docs/generated-html/namespacecutlass_1_1gemm.html @@ -0,0 +1,371 @@ + + + + + + + +Cutlass: cutlass::gemm Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  ClearAccumulators
     
    struct  DgemmConfig
     
    struct  DgemmTraits
     
    struct  FragmentMultiplyAdd
     
    struct  FragmentMultiplyAdd< half >
     
    struct  Gemm
     
    struct  GemmConfig
     
    struct  GemmDesc
     
    struct  GemmEpilogue
     
    struct  GemmEpilogueTraits
     
    struct  GemmEpilogueTraitsHelper
     
    struct  GemmGlobalIteratorAb
     
    struct  GemmGlobalIteratorCd
     
    struct  GemmGlobalTileCdTraits
     
    struct  GemmGlobalTileTraits
     
    struct  GemmMultiplicandTraits
     
    struct  GemmOperandTraitsAb
     Helper to describe attributes of GEMM matrix operands. More...
     
    struct  GemmSharedLoadTileATraits
     
    struct  GemmSharedLoadTileBTraits
     
    struct  GemmSharedLoadTileDTraits
     
    struct  GemmSharedStoreTileAbTraits
     
    struct  GemmSharedStoreTileDTraits
     
    struct  GemmSharedStoreWithSkewTileAbTraits
     
    struct  GemmTileTraitsHelperA
     
    struct  GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  GemmTileTraitsHelperB
     
    struct  GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  GemmTraits
     
    struct  GetExtent
     
    struct  GetExtent< GemmOperand::kA, Tile_ >
     
    struct  GetExtent< GemmOperand::kB, Tile_ >
     
    struct  GlobalLoadStream
     
    struct  GlobalLoadStreamBase
     
    struct  HgemmConfig
     
    struct  HgemmCrosswiseGlobalTileTraits
     
    struct  HgemmSwizzle
     
    struct  HgemmTileTraitsHelperA
     
    struct  HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  HgemmTileTraitsHelperB
     
    struct  HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  HgemmTraits
     
    struct  HgemmTraitsHelper
     
    struct  HgemmTransformerA
     
    struct  HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  HgemmTransformerB
     
    struct  HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  IdentityBlockSwizzle
     
    struct  IgemmConfig
     
    struct  IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >
     
    struct  IgemmContiguousGlobalTileTraits
     
    struct  IgemmEpilogue
     
    struct  IgemmEpilogue< GemmEpilogueTraits_, true >
     
    struct  IgemmEpilogueScalar
     
    struct  IgemmEpilogueScalar< int >
     
    struct  IgemmEpilogueTraits
     
    struct  IgemmEpilogueTraitsHelper
     
    struct  IgemmFloatToInt8Converter
     
    struct  IgemmGlobalLoadTransformer
     
    struct  IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >
     
    struct  IgemmGlobalStoreTransformer
     
    struct  IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >
     
    struct  IgemmInt8ToFloatConverter
     
    struct  IgemmSharedStoreTransformer
     
    struct  IgemmSwizzle
     
    struct  IgemmTileTraitsHelperA
     
    struct  IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
     
    struct  IgemmTileTraitsHelperB
     
    struct  IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
     
    struct  IgemmTraits
     
    struct  IgemmTraitsHelper
     
    struct  IgemmTransformerA
     
    struct  IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  IgemmTransformerB
     
    struct  IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >
     
    struct  IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >
     
    struct  LinearScaling
     Functor to compute linear combination of fragments. More...
     
    struct  ProjectOperand
     
    struct  ProjectOperand< GemmOperand::kA, Kstrided >
     Project A operand - (0, K, M) More...
     
    struct  ProjectOperand< GemmOperand::kB, Kstrided >
     Project B operand - (0, K, N) More...
     
    struct  ProjectOperand< GemmOperand::kC, true >
     Project C operand - (0, N, M) More...
     
    struct  ProjectOperand< GemmOperand::kD, true >
     Project D operand - (0, N, M) More...
     
    struct  ReshapeThreads
     
    struct  ReshapeThreads< Tile_, Threads_, true >
     
    struct  SgemmConfig
     
    struct  SgemmTraits
     
    struct  SharedLoadStream
     
    struct  SimplifiedGemmEpilogueTraits
     
    struct  SimplifiedGemmTraits
     
    struct  SimplifiedGemmTraitsHelper
     
    struct  ThreadMultiplyAdd
     Template performing matrix multiply-add operation within a thread. More...
     
    struct  ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
     Template performing matrix multiply-add operation within a thread. More...
     
    struct  ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
     Template performing matrix multiply-add operation within a thread. More...
     
    struct  WmmaGemmGlobalIteratorCd
     
    struct  WmmaGemmGlobalIteratorCdTraits
     
    + + + + + + + + + +

    +Functions

    template<typename Gemm_ >
    __global__ void gemm_kernel (typename Gemm_::Params params)
     
    template<typename T >
    CUTLASS_DEVICE bool is_zero (T x)
     
    CUTLASS_DEVICE bool is_zero (half x)
     
    +

    Function Documentation

    + +

    ◆ gemm_kernel()

    + +
    +
    +
    +template<typename Gemm_ >
    + + + + + + + + +
    __global__ void cutlass::gemm::gemm_kernel (typename Gemm_::Params params)
    +
    + +
    +
    + +

    ◆ is_zero() [1/2]

    + +
    +
    +
    +template<typename T >
    + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::is_zero (x)
    +
    + +
    +
    + +

    ◆ is_zero() [2/2]

    + +
    +
    + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::is_zero (half x)
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespacecutlass_1_1platform.html b/docs/generated-html/namespacecutlass_1_1platform.html new file mode 100644 index 0000000000..2bf30c0df6 --- /dev/null +++ b/docs/generated-html/namespacecutlass_1_1platform.html @@ -0,0 +1,938 @@ + + + + + + + +Cutlass: cutlass::platform Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  aligned_chunk
     
    struct  aligned_storage
     std::aligned_storage More...
     
    struct  alignment_of
     std::alignment_of More...
     
    struct  alignment_of< const value_t >
     
    struct  alignment_of< const volatile value_t >
     
    struct  alignment_of< double2 >
     
    struct  alignment_of< double4 >
     
    struct  alignment_of< float4 >
     
    struct  alignment_of< int4 >
     
    struct  alignment_of< long4 >
     
    struct  alignment_of< longlong2 >
     
    struct  alignment_of< longlong4 >
     
    struct  alignment_of< uint4 >
     
    struct  alignment_of< ulong4 >
     
    struct  alignment_of< ulonglong2 >
     
    struct  alignment_of< ulonglong4 >
     
    struct  alignment_of< volatile value_t >
     
    struct  bool_constant
     std::bool_constant More...
     
    struct  conditional
     std::conditional (true specialization) More...
     
    struct  conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  default_delete
     Default deleter. More...
     
    struct  default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    struct  enable_if
     std::enable_if (true specialization) More...
     
    struct  enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  greater
     std::greater More...
     
    struct  integral_constant
     std::integral_constant More...
     
    struct  is_arithmetic
     std::is_arithmetic More...
     
    struct  is_base_of
     std::is_base_of More...
     
    struct  is_base_of_helper
     Helper for std::is_base_of. More...
     
    struct  is_floating_point
     std::is_floating_point More...
     
    struct  is_fundamental
     std::is_fundamental More...
     
    struct  is_integral
     std::is_integral More...
     
    struct  is_integral< char >
     
    struct  is_integral< const T >
     
    struct  is_integral< const volatile T >
     
    struct  is_integral< int >
     
    struct  is_integral< long >
     
    struct  is_integral< long long >
     
    struct  is_integral< short >
     
    struct  is_integral< signed char >
     
    struct  is_integral< unsigned char >
     
    struct  is_integral< unsigned int >
     
    struct  is_integral< unsigned long >
     
    struct  is_integral< unsigned long long >
     
    struct  is_integral< unsigned short >
     
    struct  is_integral< volatile T >
     
    struct  is_pointer
     std::is_pointer More...
     
    struct  is_pointer_helper
     Helper for std::is_pointer (false specialization) More...
     
    struct  is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  is_same
     std::is_same (false specialization) More...
     
    struct  is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  is_trivially_copyable
     
    struct  is_void
     std::is_void More...
     
    struct  is_volatile
     std::is_volatile More...
     
    struct  is_volatile< volatile T >
     
    struct  less
     std::less More...
     
    struct  nullptr_t
     std::nullptr_t More...
     
    struct  plus
     platform::plus More...
     
    struct  remove_const
     std::remove_const (non-const specialization) More...
     
    struct  remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  remove_cv
     std::remove_cv More...
     
    struct  remove_volatile
     std::remove_volatile (non-volatile specialization) More...
     
    struct  remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    class  unique_ptr
     std::unique_ptr More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair (T1 t, T2 u)
     
    template<>
    struct __align__ (1) aligned_chunk< 1 >
     
    template<>
    struct __align__ (2) aligned_chunk< 2 >
     
    template<>
    struct __align__ (4) aligned_chunk< 4 >
     
    template<>
    struct __align__ (8) aligned_chunk< 8 >
     
    template<>
    struct __align__ (16) aligned_chunk< 16 >
     
    template<>
    struct __align__ (32) aligned_chunk< 32 >
     
    template<>
    struct __align__ (64) aligned_chunk< 64 >
     
    template<>
    struct __align__ (128) aligned_chunk< 128 >
     
    template<>
    struct __align__ (256) aligned_chunk< 256 >
     
    template<>
    struct __align__ (512) aligned_chunk< 512 >
     
    template<>
    struct __align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct __align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct __align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Typedef Documentation

    + +

    ◆ false_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, false> cutlass::platform::false_type
    +
    + +
    +
    + +

    ◆ true_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, true> cutlass::platform::true_type
    +
    + +
    +
    +

    Function Documentation

    + +

    ◆ __align__() [1/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [2/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [3/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [4/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (128 )
    +
    + +
    +
    + +

    ◆ __align__() [5/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (256 )
    +
    + +
    +
    + +

    ◆ __align__() [6/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (512 )
    +
    + +
    +
    + +

    ◆ __align__() [7/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (1024 )
    +
    + +
    +
    + +

    ◆ __align__() [8/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (2048 )
    +
    + +
    +
    + +

    ◆ __align__() [9/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (4096 )
    +
    + +
    +
    + +

    ◆ __align__() [10/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [11/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [12/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [13/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct cutlass::platform::__align__ (16 )
    +
    + +
    +
    + +

    ◆ make_pair()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE std::pair<T1, T2> cutlass::platform::make_pair (T1 t,
    T2 u 
    )
    +
    + +
    +
    + +

    ◆ max()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& cutlass::platform::max (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ min()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& cutlass::platform::min (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ operator!=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator!= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator< (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator<= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator==()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator== (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator> (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator>= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ swap()

    + +
    +
    +
    +template<typename T , typename Deleter >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    void cutlass::platform::swap (unique_ptr< T, Deleter > & lhs,
    unique_ptr< T, Deleter > & rhs 
    )
    +
    +noexcept
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespacemembers.html b/docs/generated-html/namespacemembers.html new file mode 100644 index 0000000000..9566721d18 --- /dev/null +++ b/docs/generated-html/namespacemembers.html @@ -0,0 +1,214 @@ + + + + + + + +Cutlass: Namespace Members + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    Here is a list of all namespace members with links to the namespace documentation for each member:
    + +

    - _ -

    + + +

    - c -

      +
    • cuda_perror_impl() +: cutlass +
    • +
    + + +

    - f -

    + + +

    - g -

    + + +

    - i -

    + + +

    - l -

    + + +

    - m -

    + + +

    - o -

    + + +

    - r -

    + + +

    - s -

    + + +

    - t -

    +
    + + + + diff --git a/docs/generated-html/namespacemembers_func.html b/docs/generated-html/namespacemembers_func.html new file mode 100644 index 0000000000..2ba52b0ef9 --- /dev/null +++ b/docs/generated-html/namespacemembers_func.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: Namespace Members + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +  + +

    - _ -

    + + +

    - c -

      +
    • cuda_perror_impl() +: cutlass +
    • +
    + + +

    - g -

    + + +

    - i -

    + + +

    - l -

    + + +

    - m -

    + + +

    - o -

    + + +

    - r -

    + + +

    - s -

    +
    + + + + diff --git a/docs/generated-html/namespacemembers_type.html b/docs/generated-html/namespacemembers_type.html new file mode 100644 index 0000000000..37618fba14 --- /dev/null +++ b/docs/generated-html/namespacemembers_type.html @@ -0,0 +1,87 @@ + + + + + + + +Cutlass: Namespace Members + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    + + + + diff --git a/docs/generated-html/namespacenv__std.html b/docs/generated-html/namespacenv__std.html new file mode 100644 index 0000000000..5eb2f549e8 --- /dev/null +++ b/docs/generated-html/namespacenv__std.html @@ -0,0 +1,934 @@ + + + + + + + +Cutlass: nv_std Namespace Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + +
    +
    + +
    +
    nv_std Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  aligned_chunk
     
    struct  aligned_storage
     std::aligned_storage More...
     
    struct  alignment_of
     std::alignment_of More...
     
    struct  alignment_of< const value_t >
     
    struct  alignment_of< const volatile value_t >
     
    struct  alignment_of< double2 >
     
    struct  alignment_of< double4 >
     
    struct  alignment_of< float4 >
     
    struct  alignment_of< int4 >
     
    struct  alignment_of< long4 >
     
    struct  alignment_of< longlong2 >
     
    struct  alignment_of< longlong4 >
     
    struct  alignment_of< uint4 >
     
    struct  alignment_of< ulong4 >
     
    struct  alignment_of< ulonglong2 >
     
    struct  alignment_of< ulonglong4 >
     
    struct  alignment_of< volatile value_t >
     
    struct  bool_constant
     std::bool_constant More...
     
    struct  conditional
     std::conditional (true specialization) More...
     
    struct  conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  default_delete
     Default deleter. More...
     
    struct  default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    struct  enable_if
     std::enable_if (true specialization) More...
     
    struct  enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  greater
     std::greater More...
     
    struct  integral_constant
     std::integral_constant More...
     
    struct  is_arithmetic
     std::is_arithmetic More...
     
    struct  is_base_of
     std::is_base_of More...
     
    struct  is_base_of_helper
     Helper for std::is_base_of. More...
     
    struct  is_floating_point
     std::is_floating_point More...
     
    struct  is_fundamental
     std::is_fundamental More...
     
    struct  is_integral
     std::is_integral More...
     
    struct  is_integral< char >
     
    struct  is_integral< const T >
     
    struct  is_integral< const volatile T >
     
    struct  is_integral< int >
     
    struct  is_integral< long >
     
    struct  is_integral< long long >
     
    struct  is_integral< short >
     
    struct  is_integral< signed char >
     
    struct  is_integral< unsigned char >
     
    struct  is_integral< unsigned int >
     
    struct  is_integral< unsigned long >
     
    struct  is_integral< unsigned long long >
     
    struct  is_integral< unsigned short >
     
    struct  is_integral< volatile T >
     
    struct  is_pointer
     std::is_pointer More...
     
    struct  is_pointer_helper
     Helper for std::is_pointer (false specialization) More...
     
    struct  is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  is_same
     std::is_same (false specialization) More...
     
    struct  is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  is_trivially_copyable
     
    struct  is_void
     std::is_void More...
     
    struct  is_volatile
     std::is_volatile More...
     
    struct  is_volatile< volatile T >
     
    struct  less
     std::less More...
     
    struct  nullptr_t
     std::nullptr_t More...
     
    struct  plus
     nv_std::plus More...
     
    struct  remove_const
     std::remove_const (non-const specialization) More...
     
    struct  remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  remove_cv
     std::remove_cv More...
     
    struct  remove_volatile
     std::remove_volatile (non-volatile specialization) More...
     
    struct  remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    class  unique_ptr
     std::unique_ptr More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair (T1 t, T2 u)
     
    template<>
    struct __align__ (1) aligned_chunk< 1 >
     
    template<>
    struct __align__ (2) aligned_chunk< 2 >
     
    template<>
    struct __align__ (4) aligned_chunk< 4 >
     
    template<>
    struct __align__ (8) aligned_chunk< 8 >
     
    template<>
    struct __align__ (16) aligned_chunk< 16 >
     
    template<>
    struct __align__ (32) aligned_chunk< 32 >
     
    template<>
    struct __align__ (64) aligned_chunk< 64 >
     
    template<>
    struct __align__ (128) aligned_chunk< 128 >
     
    template<>
    struct __align__ (256) aligned_chunk< 256 >
     
    template<>
    struct __align__ (512) aligned_chunk< 512 >
     
    template<>
    struct __align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct __align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct __align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Typedef Documentation

    + +

    ◆ false_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, false> nv_std::false_type
    +
    + +
    +
    + +

    ◆ true_type

    + +
    +
    + + + + +
    typedef integral_constant<bool, true> nv_std::true_type
    +
    + +
    +
    +

    Function Documentation

    + +

    ◆ __align__() [1/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [2/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [3/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (64 )
    +
    + +
    +
    + +

    ◆ __align__() [4/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (128 )
    +
    + +
    +
    + +

    ◆ __align__() [5/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (256 )
    +
    + +
    +
    + +

    ◆ __align__() [6/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (512 )
    +
    + +
    +
    + +

    ◆ __align__() [7/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (1024 )
    +
    + +
    +
    + +

    ◆ __align__() [8/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (2048 )
    +
    + +
    +
    + +

    ◆ __align__() [9/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (4096 )
    +
    + +
    +
    + +

    ◆ __align__() [10/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (32 )
    +
    + +
    +
    + +

    ◆ __align__() [11/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [12/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ ()
    +
    + +
    +
    + +

    ◆ __align__() [13/13]

    + +
    +
    +
    +template<>
    + + + + + + + + +
    struct nv_std::__align__ (16 )
    +
    + +
    +
    + +

    ◆ make_pair()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE std::pair<T1, T2> nv_std::make_pair (T1 t,
    T2 u 
    )
    +
    + +
    +
    + +

    ◆ max()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& nv_std::max (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ min()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr const T& nv_std::min (const T & a,
    const T & b 
    )
    +
    + +
    +
    + +

    ◆ operator!=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator!= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator< (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator<=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator<= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator==()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator== (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator> (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ operator>=()

    + +
    +
    +
    +template<class T1 , class T2 >
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator>= (const pair< T1, T2 > & lhs,
    const pair< T1, T2 > & rhs 
    )
    +
    + +
    +
    + +

    ◆ swap()

    + +
    +
    +
    +template<typename T , typename Deleter >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    void nv_std::swap (unique_ptr< T, Deleter > & lhs,
    unique_ptr< T, Deleter > & rhs 
    )
    +
    +noexcept
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/namespaces.html b/docs/generated-html/namespaces.html new file mode 100644 index 0000000000..b12cce12b0 --- /dev/null +++ b/docs/generated-html/namespaces.html @@ -0,0 +1,90 @@ + + + + + + + +Cutlass: Namespace List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    + +
    + +
    +
    +
    Namespace List
    +
    +
    +
    Here is a list of all namespaces with brief descriptions:
    +
    [detail level 12]
    + + + +
     Ncutlass
     Ngemm
     Nplatform
    +
    +
    + + + + diff --git a/docs/generated-html/nav_f.png b/docs/generated-html/nav_f.png new file mode 100644 index 0000000000000000000000000000000000000000..2a434079512b027c35c8645e5c28588a26d7037a GIT binary patch literal 154 zcmeAS@N?(olHy`uVBq!ia0vp^j6iI`!2~2XGqLUlQi+}}jv*C{Z|6GlwJPwqw13;S zu19GL>-oQ*A2V%~R_1;z7^jw>#@G1&qw<2nyHiiEYq?g%6}|VwyC~u8n}5{BBE6pE ztggP+I=Qr@?0(8KRezQT!i- + + + + + + +Cutlass: nv_std.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std.h File Reference
    +
    +
    + +

    C++ features that may be otherwise unimplemented for CUDA device functions. +More...

    +
    #include <stdint.h>
    +#include <algorithm>
    +#include <cstddef>
    +#include <functional>
    +#include <utility>
    +#include <cutlass/cutlass.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  nv_std::plus< T >
     nv_std::plus More...
     
    struct  nv_std::less< T >
     std::less More...
     
    struct  nv_std::greater< T >
     std::greater More...
     
    struct  nv_std::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  nv_std::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  nv_std::bool_constant< V >
     std::bool_constant More...
     
    struct  nv_std::nullptr_t
     std::nullptr_t More...
     
    struct  nv_std::enable_if< C, T >
     std::enable_if (true specialization) More...
     
    struct  nv_std::enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  nv_std::conditional< B, T, F >
     std::conditional (true specialization) More...
     
    struct  nv_std::conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  nv_std::remove_const< T >
     std::remove_const (non-const specialization) More...
     
    struct  nv_std::remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  nv_std::remove_volatile< T >
     std::remove_volatile (non-volatile specialization) More...
     
    struct  nv_std::remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    struct  nv_std::remove_cv< T >
     std::remove_cv More...
     
    struct  nv_std::is_same< A, B >
     std::is_same (false specialization) More...
     
    struct  nv_std::is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  nv_std::is_base_of_helper< BaseT, DerivedT >
     Helper for std::is_base_of. More...
     
    struct  nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
     
    struct  nv_std::is_base_of< BaseT, DerivedT >
     std::is_base_of More...
     
    struct  nv_std::is_volatile< T >
     std::is_volatile More...
     
    struct  nv_std::is_volatile< volatile T >
     
    struct  nv_std::is_pointer_helper< T >
     Helper for std::is_pointer (false specialization) More...
     
    struct  nv_std::is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  nv_std::is_pointer< T >
     std::is_pointer More...
     
    struct  nv_std::is_void< T >
     std::is_void More...
     
    struct  nv_std::is_integral< T >
     std::is_integral More...
     
    struct  nv_std::is_integral< char >
     
    struct  nv_std::is_integral< signed char >
     
    struct  nv_std::is_integral< unsigned char >
     
    struct  nv_std::is_integral< short >
     
    struct  nv_std::is_integral< unsigned short >
     
    struct  nv_std::is_integral< int >
     
    struct  nv_std::is_integral< unsigned int >
     
    struct  nv_std::is_integral< long >
     
    struct  nv_std::is_integral< unsigned long >
     
    struct  nv_std::is_integral< long long >
     
    struct  nv_std::is_integral< unsigned long long >
     
    struct  nv_std::is_integral< volatile T >
     
    struct  nv_std::is_integral< const T >
     
    struct  nv_std::is_integral< const volatile T >
     
    struct  nv_std::is_floating_point< T >
     std::is_floating_point More...
     
    struct  nv_std::is_arithmetic< T >
     std::is_arithmetic More...
     
    struct  nv_std::is_fundamental< T >
     std::is_fundamental More...
     
    struct  nv_std::is_trivially_copyable< T >
     
    struct  nv_std::alignment_of< value_t >
     std::alignment_of More...
     
    struct  nv_std::alignment_of< value_t >::pad
     
    struct  nv_std::alignment_of< int4 >
     
    struct  nv_std::alignment_of< uint4 >
     
    struct  nv_std::alignment_of< float4 >
     
    struct  nv_std::alignment_of< long4 >
     
    struct  nv_std::alignment_of< ulong4 >
     
    struct  nv_std::alignment_of< longlong2 >
     
    struct  nv_std::alignment_of< ulonglong2 >
     
    struct  nv_std::alignment_of< double2 >
     
    struct  nv_std::alignment_of< longlong4 >
     
    struct  nv_std::alignment_of< ulonglong4 >
     
    struct  nv_std::alignment_of< double4 >
     
    struct  nv_std::alignment_of< volatile value_t >
     
    struct  nv_std::alignment_of< const value_t >
     
    struct  nv_std::alignment_of< const volatile value_t >
     
    struct  nv_std::aligned_chunk< Align >
     
    struct  nv_std::aligned_storage< Len, Align >
     std::aligned_storage More...
     
    struct  nv_std::default_delete< T >
     Default deleter. More...
     
    struct  nv_std::default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    class  nv_std::unique_ptr< T, Deleter >
     std::unique_ptr More...
     
    + + + +

    +Namespaces

     nv_std
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Macros

    #define noexcept
     noexcept, constexpr More...
     
    #define constexpr
     
    #define nullptr   0
     nullptr More...
     
    #define __nv_std_cat_(a, b)   a##b
     static_assert More...
     
    #define __nv_std_cat(a, b)   __nv_std_cat_(a, b)
     
    #define static_assert(__e, __m)   typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
     
    #define __NV_STD_MAX(a, b)   (((b) > (a)) ? (b) : (a))
     Select maximum(a, b) More...
     
    #define __NV_STD_MIN(a, b)   (((b) < (a)) ? (b) : (a))
     Select minimum(a, b) More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > nv_std::true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > nv_std::false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & nv_std::min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & nv_std::max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool nv_std::operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > nv_std::make_pair (T1 t, T2 u)
     
    template<>
    struct nv_std::__align__ (1) aligned_chunk< 1 >
     
    template<>
    struct nv_std::__align__ (2) aligned_chunk< 2 >
     
    template<>
    struct nv_std::__align__ (4) aligned_chunk< 4 >
     
    template<>
    struct nv_std::__align__ (8) aligned_chunk< 8 >
     
    template<>
    struct nv_std::__align__ (16) aligned_chunk< 16 >
     
    template<>
    struct nv_std::__align__ (32) aligned_chunk< 32 >
     
    template<>
    struct nv_std::__align__ (64) aligned_chunk< 64 >
     
    template<>
    struct nv_std::__align__ (128) aligned_chunk< 128 >
     
    template<>
    struct nv_std::__align__ (256) aligned_chunk< 256 >
     
    template<>
    struct nv_std::__align__ (512) aligned_chunk< 512 >
     
    template<>
    struct nv_std::__align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct nv_std::__align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct nv_std::__align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void nv_std::swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Detailed Description

    +

    This file has three components:

    +

    (1) Macros:

      +
    • Empty macro defines for C++ keywords not supported by the current version of C++. These simply allow compilation to proceed (but do not provide the added semantics).
        +
      • noexcept
      • +
      • constexpr
      • +
      • nullptr
      • +
      • static_assert
      • +
      +
    • +
    • Macro functions that we need in constant expressions because the C++ equivalents require constexpr compiler support. These are prefixed with __NV_STD_*
        +
      • __NV_STD_MAX
      • +
      • __NV_STD_MIN
      • +
      +
    • +
    +

    (2) Re-implementations of STL functions and types:

      +
    • C++ features that need the device annotation. These are placed into the nv_std namespace.
        +
      • plus
      • +
      • less
      • +
      • greater
      • +
      • min
      • +
      • max
      • +
      • methods on std::pair (==, !=, <, <=, >, >=, and make_pair())
      • +
      +
    • +
    +

    (3) Stop-gap implementations of unsupported STL functions and types:

      +
    • STL functions and types defined by C++ 11/14/17/etc. that are not provided by the current version of C++. These are placed into the nv_std namespace
        +
      • integral_constant
      • +
      • nullptr_t
      • +
      • true_type
      • +
      • false_type
      • +
      • bool_constant
      • +
      • enable_if
      • +
      • conditional
      • +
      • is_same
      • +
      • is_base_of
      • +
      • remove_const
      • +
      • remove_volatile
      • +
      • remove_cv
      • +
      • is_volatile
      • +
      • is_pointer
      • +
      • is_void
      • +
      • is_integral
      • +
      • is_floating_point
      • +
      • is_arithmetic
      • +
      • is_fundamental
      • +
      • is_trivially_copyable
      • +
      • alignment_of
      • +
      • aligned_storage
      • +
      +
    • +
    +

    (4) Functions and types that are STL-like (but aren't in the STL):

      +
    • TODO: min and max functors?
    • +
    +

    The idea is that, as we drop support for older compilers, we can simply #define the __NV_STD_XYZ macros and nv_std namespace to alias their C++ counterparts (or trivially find-and-replace their occurrences in code text).

    +

    Macro Definition Documentation

    + +

    ◆ __nv_std_cat

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __nv_std_cat( a,
     
    )   __nv_std_cat_(a, b)
    +
    + +
    +
    + +

    ◆ __nv_std_cat_

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __nv_std_cat_( a,
     
    )   a##b
    +
    + +
    +
    + +

    ◆ __NV_STD_MAX

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MAX( a,
     
    )   (((b) > (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ __NV_STD_MIN

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MIN( a,
     
    )   (((b) < (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ constexpr

    + +
    +
    + + + + +
    #define constexpr
    +
    + +
    +
    + +

    ◆ noexcept

    + +
    +
    + + + + +
    #define noexcept
    +
    + +
    +
    + +

    ◆ nullptr

    + +
    +
    + + + + +
    #define nullptr   0
    +
    + +
    +
    + +

    ◆ static_assert

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define static_assert( __e,
     __m 
    )   typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/nv__std_8h_source.html b/docs/generated-html/nv__std_8h_source.html new file mode 100644 index 0000000000..a28a327cff --- /dev/null +++ b/docs/generated-html/nv__std_8h_source.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: nv_std.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    25 
    26 #pragma once
    27 
    94 //-----------------------------------------------------------------------------
    95 // Dependencies
    96 //-----------------------------------------------------------------------------
    97 
    98 #include <stdint.h>
    99 
    100 #if !defined(__CUDACC_RTC__)
    101 //-----------------------------------------------------------------------------
    102 // Include STL files that nv_std provides functionality for
    103 //-----------------------------------------------------------------------------
    104 
    105 #include <algorithm> // Minimum/maximum operations
    106 #include <cstddef> // nullptr_t
    107 #include <functional> // Arithmetic operations
    108 #include <utility> // For methods on std::pair
    109 #if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500))
    110 #include <type_traits> // For integral constants, conditional metaprogramming, and type traits
    111 #endif
    112 
    113 #include <cutlass/cutlass.h>
    114 
    115 #endif
    116 /******************************************************************************
    117  * Macros
    118  ******************************************************************************/
    119 //-----------------------------------------------------------------------------
    120 // Keywords
    121 //-----------------------------------------------------------------------------
    122 
    124 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    125 #ifndef noexcept
    126 #define noexcept
    127 #endif
    128 #ifndef constexpr
    129 #define constexpr
    130 #endif
    131 #endif
    132 
    134 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310))
    135 #ifndef nullptr
    136 #define nullptr 0
    137 #endif
    138 #endif
    139 
    141 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    142 #ifndef static_assert
    143 #define __nv_std_cat_(a, b) a##b
    144 #define __nv_std_cat(a, b) __nv_std_cat_(a, b)
    145 #define static_assert(__e, __m) typedef int __nv_std_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    146 #endif
    147 #endif
    148 
    149 //-----------------------------------------------------------------------------
    150 // Functions
    151 //-----------------------------------------------------------------------------
    152 
    154 #ifndef __NV_STD_MAX
    155 #define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a))
    156 #endif
    157 
    159 #ifndef __NV_STD_MIN
    160 #define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a))
    161 #endif
    162 
    163 /******************************************************************************
    164  * Re-implementations
    165  ******************************************************************************/
    166 
    167 namespace nv_std {
    168 
    169 //-----------------------------------------------------------------------------
    170 // Arithmetic operations, comparisons <functional>
    171 //-----------------------------------------------------------------------------
    172 
    174 template <typename T>
    175 struct plus {
    176  CUTLASS_HOST_DEVICE constexpr T operator()(const T& lhs, const T& rhs) const { return lhs + rhs; }
    177 };
    178 
    180 template <typename T>
    181 struct less {
    182  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    183  return lhs < rhs;
    184  }
    185 };
    186 
    188 template <typename T>
    189 struct greater {
    190  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    191  return lhs > rhs;
    192  }
    193 };
    194 
    195 //-----------------------------------------------------------------------------
    196 // Minimum/maximum operations <algorithm>
    197 //-----------------------------------------------------------------------------
    198 
    200 template <typename T>
    201 CUTLASS_HOST_DEVICE constexpr const T& min(const T& a, const T& b) {
    202  return (b < a) ? b : a;
    203 }
    204 
    206 template <typename T>
    207 CUTLASS_HOST_DEVICE constexpr const T& max(const T& a, const T& b) {
    208  return (a < b) ? b : a;
    209 }
    210 
    211 #if !defined(__CUDACC_RTC__)
    212 //-----------------------------------------------------------------------------
    213 // Methods on std::pair
    214 //-----------------------------------------------------------------------------
    215 
    216 using std::pair;
    217 
    218 template <class T1, class T2>
    219 CUTLASS_HOST_DEVICE constexpr bool operator==(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    220  return (lhs.first == rhs.first) && (lhs.second == rhs.second);
    221 }
    222 
    223 template <class T1, class T2>
    224 CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    225  return (lhs.first != rhs.first) && (lhs.second != rhs.second);
    226 }
    227 
    228 template <class T1, class T2>
    229 CUTLASS_HOST_DEVICE constexpr bool operator<(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    230  return (lhs.first < rhs.first) ? true : (rhs.first < lhs.first) ? false
    231  : (lhs.second < rhs.second);
    232 }
    233 
    234 template <class T1, class T2>
    235 CUTLASS_HOST_DEVICE constexpr bool operator<=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    236  return !(rhs < lhs);
    237 }
    238 
    239 template <class T1, class T2>
    240 CUTLASS_HOST_DEVICE constexpr bool operator>(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    241  return (rhs < lhs);
    242 }
    243 
    244 template <class T1, class T2>
    245 CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    246  return !(lhs < rhs);
    247 }
    248 
    249 template <class T1, class T2>
    250 CUTLASS_HOST_DEVICE std::pair<T1, T2> make_pair(T1 t, T2 u) {
    251  std::pair<T1, T2> retval;
    252  retval.first = t;
    253  retval.second = u;
    254  return retval;
    255 }
    256 #endif
    257 
    258 } // namespace nv_std
    259 
    260 /******************************************************************************
    261  * Implementations of C++ 11/14/17/... STL features
    262  ******************************************************************************/
    263 
    264 namespace nv_std {
    265 
    266 //-----------------------------------------------------------------------------
    267 // Integral constant helper types <type_traits>
    268 //-----------------------------------------------------------------------------
    269 
    270 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    271 
    273 template <typename value_t, value_t V>
    275 
    277 template <typename value_t, value_t V>
    278 struct integral_constant {
    279  static const value_t value = V;
    280 
    281  typedef value_t value_type;
    283 
    284  CUTLASS_HOST_DEVICE operator value_type() const { return value; }
    285 
    286  CUTLASS_HOST_DEVICE const value_type operator()() const { return value; }
    287 };
    288 
    289 #else
    290 
    291 using std::integral_constant;
    292 using std::pair;
    293 
    294 #endif
    295 
    298 
    301 
    302 #if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    303 
    305 template <bool V>
    307 
    308 #else
    309 
    310 using std::bool_constant;
    311 
    312 #endif
    313 
    314 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700))
    315 
    317 struct nullptr_t {};
    318 
    319 #else
    320 
    321 using std::nullptr_t;
    322 
    323 #endif
    324 
    325 //-----------------------------------------------------------------------------
    326 // Conditional metaprogramming <type_traits>
    327 //-----------------------------------------------------------------------------
    328 
    329 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    330 
    332 template <bool C, typename T = void>
    333 struct enable_if {
    334  typedef T type;
    335 };
    336 
    338 template <typename T>
    339 struct enable_if<false, T> {};
    340 
    342 template <bool B, class T, class F>
    343 struct conditional {
    344  typedef T type;
    345 };
    346 
    348 template <class T, class F>
    349 struct conditional<false, T, F> {
    350  typedef F type;
    351 };
    352 
    353 #else
    354 
    355 using std::enable_if;
    356 using std::conditional;
    357 
    358 #endif
    359 
    360 //-----------------------------------------------------------------------------
    361 // Const/volatility specifiers <type_traits>
    362 //-----------------------------------------------------------------------------
    363 
    364 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    365 
    367 template <typename T>
    368 struct remove_const {
    369  typedef T type;
    370 };
    371 
    373 template <typename T>
    374 struct remove_const<const T> {
    375  typedef T type;
    376 };
    377 
    379 template <typename T>
    381  typedef T type;
    382 };
    383 
    385 template <typename T>
    386 struct remove_volatile<volatile T> {
    387  typedef T type;
    388 };
    389 
    391 template <typename T>
    392 struct remove_cv {
    394 };
    395 
    396 #else
    397 
    398 using std::remove_const;
    399 using std::remove_volatile;
    400 using std::remove_cv;
    401 
    402 #endif
    403 
    404 //-----------------------------------------------------------------------------
    405 // Type relationships <type_traits>
    406 //-----------------------------------------------------------------------------
    407 
    408 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    409 
    411 template <typename A, typename B>
    412 struct is_same : false_type {};
    413 
    415 template <typename A>
    416 struct is_same<A, A> : true_type {};
    417 
    419 template <typename BaseT, typename DerivedT>
    421  typedef char (&yes)[1];
    422  typedef char (&no)[2];
    423 
    424  template <typename B, typename D>
    425  struct dummy {
    426  CUTLASS_HOST_DEVICE operator B*() const;
    427  CUTLASS_HOST_DEVICE operator D*();
    428  };
    429 
    430  template <typename T>
    431  CUTLASS_HOST_DEVICE static yes check(DerivedT*, T);
    432 
    433  CUTLASS_HOST_DEVICE static no check(BaseT*, int);
    434 
    435  static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes);
    436 };
    437 
    439 template <typename BaseT, typename DerivedT>
    441  : integral_constant<bool, (is_base_of_helper<typename remove_cv<BaseT>::type,
    442  typename remove_cv<DerivedT>::type>::value) ||
    443  (is_same<typename remove_cv<BaseT>::type,
    444  typename remove_cv<DerivedT>::type>::value)> {};
    445 
    446 #else
    447 
    448 using std::is_same;
    449 using std::is_base_of;
    450 
    451 #endif
    452 
    453 //-----------------------------------------------------------------------------
    454 // Type properties <type_traits>
    455 //-----------------------------------------------------------------------------
    456 
    457 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    458 
    460 template <typename T>
    462 template <typename T>
    463 struct is_volatile<volatile T> : true_type {};
    464 
    466 template <typename T>
    468 
    470 template <typename T>
    471 struct is_pointer_helper<T*> : true_type {};
    472 
    474 template <typename T>
    475 struct is_pointer : is_pointer_helper<typename remove_cv<T>::type> {};
    476 
    478 template <typename T>
    479 struct is_void : is_same<void, typename remove_cv<T>::type> {};
    480 
    482 template <typename T>
    484 template <>
    485 struct is_integral<char> : true_type {};
    486 template <>
    487 struct is_integral<signed char> : true_type {};
    488 template <>
    489 struct is_integral<unsigned char> : true_type {};
    490 template <>
    491 struct is_integral<short> : true_type {};
    492 template <>
    493 struct is_integral<unsigned short> : true_type {};
    494 template <>
    495 struct is_integral<int> : true_type {};
    496 template <>
    497 struct is_integral<unsigned int> : true_type {};
    498 template <>
    499 struct is_integral<long> : true_type {};
    500 template <>
    501 struct is_integral<unsigned long> : true_type {};
    502 template <>
    503 struct is_integral<long long> : true_type {};
    504 template <>
    505 struct is_integral<unsigned long long> : true_type {};
    506 template <typename T>
    507 struct is_integral<volatile T> : is_integral<T> {};
    508 template <typename T>
    509 struct is_integral<const T> : is_integral<T> {};
    510 template <typename T>
    511 struct is_integral<const volatile T> : is_integral<T> {};
    512 
    514 template <typename T>
    516  : integral_constant<bool, (is_same<float, typename remove_cv<T>::type>::value ||
    517  is_same<double, typename remove_cv<T>::type>::value)> {};
    518 
    520 template <typename T>
    522  : integral_constant<bool, (is_integral<T>::value || is_floating_point<T>::value)> {};
    523 
    525 template <typename T>
    527  : integral_constant<bool, (is_arithmetic<T>::value || is_void<T>::value ||
    528  is_same<nullptr_t, typename remove_cv<T>::type>::value)> {};
    529 
    530 #else
    531 
    532 using std::is_volatile;
    533 using std::is_pointer;
    534 using std::is_void;
    535 using std::is_integral;
    536 using std::is_floating_point;
    537 using std::is_arithmetic;
    538 using std::is_fundamental;
    539 
    540 #endif
    541 
    542 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
    543  (defined(__GNUG__) && (__GNUC__ < 5))
    544 
    555 template <typename T>
    557  : integral_constant<bool, (is_fundamental<T>::value || is_pointer<T>::value)> {};
    558 
    559 #else
    560 
    561 using std::is_trivially_copyable;
    562 
    563 #endif
    564 
    565 //-----------------------------------------------------------------------------
    566 // Alignment and layout utilities
    567 //-----------------------------------------------------------------------------
    568 
    569 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    570 
    572 template <typename value_t>
    573 struct alignment_of {
    574  struct pad {
    575  value_t val;
    576  char byte;
    577  };
    578 
    579  enum { value = sizeof(pad) - sizeof(value_t) };
    580 };
    581 
    582 #else
    583 
    584 template <typename value_t>
    585 struct alignment_of : std::alignment_of<value_t> {};
    586 
    587 #endif
    588 
    589 /* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */
    590 template <>
    591 struct alignment_of<int4> {
    592  enum { value = 16 };
    593 };
    594 template <>
    595 struct alignment_of<uint4> {
    596  enum { value = 16 };
    597 };
    598 template <>
    599 struct alignment_of<float4> {
    600  enum { value = 16 };
    601 };
    602 template <>
    603 struct alignment_of<long4> {
    604  enum { value = 16 };
    605 };
    606 template <>
    607 struct alignment_of<ulong4> {
    608  enum { value = 16 };
    609 };
    610 template <>
    611 struct alignment_of<longlong2> {
    612  enum { value = 16 };
    613 };
    614 template <>
    615 struct alignment_of<ulonglong2> {
    616  enum { value = 16 };
    617 };
    618 template <>
    619 struct alignment_of<double2> {
    620  enum { value = 16 };
    621 };
    622 template <>
    623 struct alignment_of<longlong4> {
    624  enum { value = 16 };
    625 };
    626 template <>
    627 struct alignment_of<ulonglong4> {
    628  enum { value = 16 };
    629 };
    630 template <>
    631 struct alignment_of<double4> {
    632  enum { value = 16 };
    633 };
    634 
    635 // Specializations for volatile/const qualified types
    636 template <typename value_t>
    637 struct alignment_of<volatile value_t> : alignment_of<value_t> {};
    638 template <typename value_t>
    639 struct alignment_of<const value_t> : alignment_of<value_t> {};
    640 template <typename value_t>
    641 struct alignment_of<const volatile value_t> : alignment_of<value_t> {};
    642 
    643 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800))
    644 
    645 template <size_t Align>
    647 template <>
    648 struct __align__(1) aligned_chunk<1> {
    649  uint8_t buff;
    650 };
    651 template <>
    652 struct __align__(2) aligned_chunk<2> {
    653  uint16_t buff;
    654 };
    655 template <>
    656 struct __align__(4) aligned_chunk<4> {
    657  uint32_t buff;
    658 };
    659 template <>
    660 struct __align__(8) aligned_chunk<8> {
    661  uint32_t buff[2];
    662 };
    663 template <>
    664 struct __align__(16) aligned_chunk<16> {
    665  uint32_t buff[4];
    666 };
    667 template <>
    668 struct __align__(32) aligned_chunk<32> {
    669  uint32_t buff[8];
    670 };
    671 template <>
    672 struct __align__(64) aligned_chunk<64> {
    673  uint32_t buff[16];
    674 };
    675 template <>
    676 struct __align__(128) aligned_chunk<128> {
    677  uint32_t buff[32];
    678 };
    679 template <>
    680 struct __align__(256) aligned_chunk<256> {
    681  uint32_t buff[64];
    682 };
    683 template <>
    684 struct __align__(512) aligned_chunk<512> {
    685  uint32_t buff[128];
    686 };
    687 template <>
    688 struct __align__(1024) aligned_chunk<1024> {
    689  uint32_t buff[256];
    690 };
    691 template <>
    692 struct __align__(2048) aligned_chunk<2048> {
    693  uint32_t buff[512];
    694 };
    695 template <>
    696 struct __align__(4096) aligned_chunk<4096> {
    697  uint32_t buff[1024];
    698 };
    699 
    701 template <size_t Len, size_t Align>
    704 };
    705 
    706 #else
    707 
    708 using std::aligned_storage;
    709 
    710 #endif
    711 
    712 #if !defined(__CUDACC_RTC__)
    713 template <typename T>
    716  void operator()(T* ptr) const { delete ptr; }
    717 };
    718 
    720 template <typename T>
    721 struct default_delete<T[]> {
    722  void operator()(T* ptr) const { delete[] ptr; }
    723 };
    724 
    726 template <class T, class Deleter = nv_std::default_delete<T> >
    727 class unique_ptr {
    728  public:
    729  typedef T* pointer;
    730  typedef T element_type;
    731  typedef Deleter deleter_type;
    732 
    733  private:
    735  pointer _ptr;
    736 
    738  deleter_type _deleter;
    739 
    740  public:
    741  unique_ptr() : _ptr(nullptr) {}
    742  unique_ptr(pointer p) : _ptr(p) {}
    743 
    745  if (_ptr) {
    746  _deleter(_ptr);
    747  }
    748  }
    750  pointer get() const noexcept { return _ptr; }
    751 
    754  pointer p(_ptr);
    755  _ptr = nullptr;
    756  return p;
    757  }
    758 
    761  pointer old_ptr = _ptr;
    762  _ptr = p;
    763  if (old_ptr != nullptr) {
    764  get_deleter()(old_ptr);
    765  }
    766  }
    767 
    769  void swap(unique_ptr& other) noexcept { std::swap(_ptr, other._ptr); }
    770 
    772  Deleter& get_deleter() noexcept { return _deleter; }
    773 
    775  Deleter const& get_deleter() const noexcept { return _deleter; }
    776 
    778  operator bool() const noexcept { return _ptr != nullptr; }
    779 
    781  T& operator*() const { return *_ptr; }
    782 
    784  pointer operator->() const noexcept { return _ptr; }
    785 
    787  T& operator[](size_t i) const { return _ptr[i]; }
    788 };
    789 
    791 template <typename T, typename Deleter>
    793  lhs.swap(rhs);
    794 }
    795 #endif
    796 
    797 }; // namespace nv_std
    std::unique_ptr
    Definition: nv_std.h:727
    +
    Definition: nv_std.h:574
    +
    void reset(pointer p=pointer()) noexcept
    Replaces the managed object, deleting the old object.
    Definition: nv_std.h:760
    +
    static const bool value
    Definition: nv_std.h:435
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:240
    +
    std::conditional (true specialization)
    Definition: nv_std.h:343
    +
    T type
    Definition: nv_std.h:344
    +
    value_t value_type
    Definition: nv_std.h:281
    +
    Deleter & get_deleter() noexcept
    Returns the deleter object.
    Definition: nv_std.h:772
    +
    pointer release() noexcept
    Releases ownership of the managed object, if any.
    Definition: nv_std.h:753
    +
    T type
    Definition: nv_std.h:334
    +
    integral_constant< bool, false > false_type
    The type used as a compile-time boolean with false value.
    Definition: nv_std.h:300
    +
    std::is_pointer
    Definition: nv_std.h:475
    +
    pointer operator->() const noexcept
    Returns a pointer to the managed object.
    Definition: nv_std.h:784
    +
    std::alignment_of
    Definition: nv_std.h:573
    +
    Definition: nv_std.h:556
    +
    integral_constant< value_t, V > type
    Definition: nv_std.h:282
    +
    char byte
    Definition: nv_std.h:576
    +
    Definition: nv_std.h:579
    +
    Helper for std::is_pointer (false specialization)
    Definition: nv_std.h:467
    +
    T & operator[](size_t i) const
    Array access to managed object.
    Definition: nv_std.h:787
    +
    char(& no)[2]
    Definition: nv_std.h:422
    +
    std::less
    Definition: nv_std.h:181
    +
    Deleter deleter_type
    Definition: nv_std.h:731
    +
    value_t val
    Definition: nv_std.h:575
    +
    #define constexpr
    Definition: nv_std.h:129
    +
    std::remove_volatile (non-volatile specialization)
    Definition: nv_std.h:380
    +
    static const value_t value
    Definition: nv_std.h:279
    +
    CUTLASS_HOST_DEVICE constexpr bool operator==(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:219
    +
    std::remove_cv
    Definition: nv_std.h:392
    +
    std::is_base_of
    Definition: nv_std.h:440
    +
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair(T1 t, T2 u)
    Definition: nv_std.h:250
    +
    std::integral_constant
    Definition: nv_std.h:274
    +
    struct __align__(1) aligned_chunk< 1 >
    Definition: nv_std.h:648
    +
    void operator()(T *ptr) const
    Definition: nv_std.h:716
    +
    void swap(unique_ptr &other) noexcept
    Swaps the managed objects with *this and another unique_ptr.
    Definition: nv_std.h:769
    +
    std::remove_const (non-const specialization)
    Definition: nv_std.h:368
    +
    CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
    std::max
    Definition: nv_std.h:207
    +
    Definition: nv_std.h:167
    +
    char(& yes)[1]
    Definition: nv_std.h:421
    +
    T type
    Definition: nv_std.h:381
    +
    CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:224
    +
    std::is_volatile
    Definition: nv_std.h:461
    +
    T element_type
    Definition: nv_std.h:730
    +
    nv_std::plus
    Definition: nv_std.h:175
    +
    std::is_same (false specialization)
    Definition: nv_std.h:412
    +
    Default deleter.
    Definition: nv_std.h:715
    +
    T * pointer
    Definition: nv_std.h:729
    +
    Deleter const & get_deleter() const noexcept
    Returns the deleter object.
    Definition: nv_std.h:775
    +
    std::is_integral
    Definition: nv_std.h:483
    +
    Helper for std::is_base_of.
    Definition: nv_std.h:420
    +
    std::is_fundamental
    Definition: nv_std.h:526
    +
    void swap(unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
    Specializes the swap algorithm.
    Definition: nv_std.h:792
    +
    Definition: nv_std.h:425
    +
    CUTLASS_HOST_DEVICE const value_type operator()() const
    Definition: nv_std.h:286
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    #define noexcept
    noexcept, constexpr
    Definition: nv_std.h:126
    +
    std::enable_if (true specialization)
    Definition: nv_std.h:333
    +
    Definition: nv_std.h:646
    +
    unique_ptr()
    Definition: nv_std.h:741
    +
    std::greater
    Definition: nv_std.h:189
    +
    std::is_floating_point
    Definition: nv_std.h:515
    +
    #define nullptr
    nullptr
    Definition: nv_std.h:136
    +
    CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
    std::min
    Definition: nv_std.h:201
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: nv_std.h:190
    +
    aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
    Definition: nv_std.h:703
    +
    std::nullptr_t
    Definition: nv_std.h:317
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: nv_std.h:182
    +
    unique_ptr(pointer p)
    Definition: nv_std.h:742
    +
    std::aligned_storage
    Definition: nv_std.h:702
    +
    remove_volatile< typename remove_const< T >::type >::type type
    Definition: nv_std.h:393
    +
    integral_constant< bool, true > true_type
    The type used as a compile-time boolean with true value.
    Definition: nv_std.h:297
    +
    T & operator*() const
    Dereferences the unique_ptr.
    Definition: nv_std.h:781
    +
    std::bool_constant
    Definition: nv_std.h:306
    +
    std::is_void
    Definition: nv_std.h:479
    +
    F type
    Definition: nv_std.h:350
    +
    static CUTLASS_HOST_DEVICE yes check(DerivedT *, T)
    +
    T type
    Definition: nv_std.h:369
    +
    CUTLASS_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const
    Definition: nv_std.h:176
    +
    T type
    Definition: nv_std.h:375
    +
    Basic include for CUTLASS macros.
    +
    T type
    Definition: nv_std.h:387
    +
    void operator()(T *ptr) const
    Definition: nv_std.h:722
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: nv_std.h:245
    +
    ~unique_ptr()
    Definition: nv_std.h:744
    +
    std::is_arithmetic
    Definition: nv_std.h:521
    +
    + + + + diff --git a/docs/generated-html/open.png b/docs/generated-html/open.png new file mode 100644 index 0000000000000000000000000000000000000000..6bc64cce06383af89d746f9a0926e81baf1b2214 GIT binary patch literal 123 zcmeAS@N?(olHy`uVBq!ia0vp^oFL4>1|%O$WD@{VPM$7~Ar*{o?;hlAFyLXmaDB~H z$2&(EcXc;gOixl2Uujug^TE?jMft$}EsyW$uQ+ney;=Xfoza2lbLp9(I{Its_Asuy W!;{0H`sOjvGzL#sKbLh*2~7a4&? + + + + + + +Cutlass: platform.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    platform.h File Reference
    +
    +
    + +

    C++ features that may be otherwise unimplemented for CUDA device functions. +More...

    +
    #include <stdint.h>
    +#include <algorithm>
    +#include <cstddef>
    +#include <functional>
    +#include <utility>
    +#include <cutlass/cutlass.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::platform::plus< T >
     platform::plus More...
     
    struct  cutlass::platform::less< T >
     std::less More...
     
    struct  cutlass::platform::greater< T >
     std::greater More...
     
    struct  cutlass::platform::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  cutlass::platform::integral_constant< value_t, V >
     std::integral_constant More...
     
    struct  cutlass::platform::bool_constant< V >
     std::bool_constant More...
     
    struct  cutlass::platform::nullptr_t
     std::nullptr_t More...
     
    struct  cutlass::platform::enable_if< C, T >
     std::enable_if (true specialization) More...
     
    struct  cutlass::platform::enable_if< false, T >
     std::enable_if (false specialization) More...
     
    struct  cutlass::platform::conditional< B, T, F >
     std::conditional (true specialization) More...
     
    struct  cutlass::platform::conditional< false, T, F >
     std::conditional (false specialization) More...
     
    struct  cutlass::platform::remove_const< T >
     std::remove_const (non-const specialization) More...
     
    struct  cutlass::platform::remove_const< const T >
     std::remove_const (const specialization) More...
     
    struct  cutlass::platform::remove_volatile< T >
     std::remove_volatile (non-volatile specialization) More...
     
    struct  cutlass::platform::remove_volatile< volatile T >
     std::remove_volatile (volatile specialization) More...
     
    struct  cutlass::platform::remove_cv< T >
     std::remove_cv More...
     
    struct  cutlass::platform::is_same< A, B >
     std::is_same (false specialization) More...
     
    struct  cutlass::platform::is_same< A, A >
     std::is_same (true specialization) More...
     
    struct  cutlass::platform::is_base_of_helper< BaseT, DerivedT >
     Helper for std::is_base_of. More...
     
    struct  cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >
     
    struct  cutlass::platform::is_base_of< BaseT, DerivedT >
     std::is_base_of More...
     
    struct  cutlass::platform::is_volatile< T >
     std::is_volatile More...
     
    struct  cutlass::platform::is_volatile< volatile T >
     
    struct  cutlass::platform::is_pointer_helper< T >
     Helper for std::is_pointer (false specialization) More...
     
    struct  cutlass::platform::is_pointer_helper< T * >
     Helper for std::is_pointer (true specialization) More...
     
    struct  cutlass::platform::is_pointer< T >
     std::is_pointer More...
     
    struct  cutlass::platform::is_void< T >
     std::is_void More...
     
    struct  cutlass::platform::is_integral< T >
     std::is_integral More...
     
    struct  cutlass::platform::is_integral< char >
     
    struct  cutlass::platform::is_integral< signed char >
     
    struct  cutlass::platform::is_integral< unsigned char >
     
    struct  cutlass::platform::is_integral< short >
     
    struct  cutlass::platform::is_integral< unsigned short >
     
    struct  cutlass::platform::is_integral< int >
     
    struct  cutlass::platform::is_integral< unsigned int >
     
    struct  cutlass::platform::is_integral< long >
     
    struct  cutlass::platform::is_integral< unsigned long >
     
    struct  cutlass::platform::is_integral< long long >
     
    struct  cutlass::platform::is_integral< unsigned long long >
     
    struct  cutlass::platform::is_integral< volatile T >
     
    struct  cutlass::platform::is_integral< const T >
     
    struct  cutlass::platform::is_integral< const volatile T >
     
    struct  cutlass::platform::is_floating_point< T >
     std::is_floating_point More...
     
    struct  cutlass::platform::is_arithmetic< T >
     std::is_arithmetic More...
     
    struct  cutlass::platform::is_fundamental< T >
     std::is_fundamental More...
     
    struct  cutlass::platform::is_trivially_copyable< T >
     
    struct  cutlass::platform::alignment_of< value_t >
     std::alignment_of More...
     
    struct  cutlass::platform::alignment_of< value_t >::pad
     
    struct  cutlass::platform::alignment_of< int4 >
     
    struct  cutlass::platform::alignment_of< uint4 >
     
    struct  cutlass::platform::alignment_of< float4 >
     
    struct  cutlass::platform::alignment_of< long4 >
     
    struct  cutlass::platform::alignment_of< ulong4 >
     
    struct  cutlass::platform::alignment_of< longlong2 >
     
    struct  cutlass::platform::alignment_of< ulonglong2 >
     
    struct  cutlass::platform::alignment_of< double2 >
     
    struct  cutlass::platform::alignment_of< longlong4 >
     
    struct  cutlass::platform::alignment_of< ulonglong4 >
     
    struct  cutlass::platform::alignment_of< double4 >
     
    struct  cutlass::platform::alignment_of< volatile value_t >
     
    struct  cutlass::platform::alignment_of< const value_t >
     
    struct  cutlass::platform::alignment_of< const volatile value_t >
     
    struct  cutlass::platform::aligned_chunk< Align >
     
    struct  cutlass::platform::aligned_storage< Len, Align >
     std::aligned_storage More...
     
    struct  cutlass::platform::default_delete< T >
     Default deleter. More...
     
    struct  cutlass::platform::default_delete< T[]>
     Partial specialization for deleting array types. More...
     
    class  cutlass::platform::unique_ptr< T, Deleter >
     std::unique_ptr More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::platform
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Macros

    #define noexcept
     noexcept, constexpr More...
     
    #define constexpr
     
    #define nullptr   0
     nullptr More...
     
    #define __platform_cat_(a, b)   a##b
     static_assert More...
     
    #define __platform_cat(a, b)   __platform_cat_(a, b)
     
    #define static_assert(__e, __m)   typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
     
    #define __NV_STD_MAX(a, b)   (((b) > (a)) ? (b) : (a))
     Select maximum(a, b) More...
     
    #define __NV_STD_MIN(a, b)   (((b) < (a)) ? (b) : (a))
     Select minimum(a, b) More...
     
    + + + + + + + +

    +Typedefs

    typedef integral_constant< bool, true > cutlass::platform::true_type
     The type used as a compile-time boolean with true value. More...
     
    typedef integral_constant< bool, false > cutlass::platform::false_type
     The type used as a compile-time boolean with false value. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & cutlass::platform::min (const T &a, const T &b)
     std::min More...
     
    template<typename T >
    CUTLASS_HOST_DEVICE constexpr const T & cutlass::platform::max (const T &a, const T &b)
     std::max More...
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator== (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator!= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator< (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator<= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator> (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::operator>= (const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
     
    template<class T1 , class T2 >
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > cutlass::platform::make_pair (T1 t, T2 u)
     
    template<>
    struct cutlass::platform::__align__ (1) aligned_chunk< 1 >
     
    template<>
    struct cutlass::platform::__align__ (2) aligned_chunk< 2 >
     
    template<>
    struct cutlass::platform::__align__ (4) aligned_chunk< 4 >
     
    template<>
    struct cutlass::platform::__align__ (8) aligned_chunk< 8 >
     
    template<>
    struct cutlass::platform::__align__ (16) aligned_chunk< 16 >
     
    template<>
    struct cutlass::platform::__align__ (32) aligned_chunk< 32 >
     
    template<>
    struct cutlass::platform::__align__ (64) aligned_chunk< 64 >
     
    template<>
    struct cutlass::platform::__align__ (128) aligned_chunk< 128 >
     
    template<>
    struct cutlass::platform::__align__ (256) aligned_chunk< 256 >
     
    template<>
    struct cutlass::platform::__align__ (512) aligned_chunk< 512 >
     
    template<>
    struct cutlass::platform::__align__ (1024) aligned_chunk< 1024 >
     
    template<>
    struct cutlass::platform::__align__ (2048) aligned_chunk< 2048 >
     
    template<>
    struct cutlass::platform::__align__ (4096) aligned_chunk< 4096 >
     
    template<typename T , typename Deleter >
    void cutlass::platform::swap (unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
     Specializes the swap algorithm. More...
     
    +

    Detailed Description

    +

    This file has three components:

    +

    (1) Macros:

      +
    • Empty macro defines for C++ keywords not supported by the current version of C++. These simply allow compilation to proceed (but do not provide the added semantics).
        +
      • noexcept
      • +
      • constexpr
      • +
      • nullptr
      • +
      • static_assert
      • +
      +
    • +
    • Macro functions that we need in constant expressions because the C++ equivalents require constexpr compiler support. These are prefixed with __NV_STD_*
        +
      • __NV_STD_MAX
      • +
      • __NV_STD_MIN
      • +
      +
    • +
    +

    (2) Re-implementations of STL functions and types:

      +
    • C++ features that need the device annotation. These are placed into the platform namespace.
        +
      • plus
      • +
      • less
      • +
      • greater
      • +
      • min
      • +
      • max
      • +
      • methods on std::pair (==, !=, <, <=, >, >=, and make_pair())
      • +
      +
    • +
    +

    (3) Stop-gap implementations of unsupported STL functions and types:

      +
    • STL functions and types defined by C++ 11/14/17/etc. that are not provided by the current version of C++. These are placed into the platform namespace
        +
      • integral_constant
      • +
      • nullptr_t
      • +
      • true_type
      • +
      • false_type
      • +
      • bool_constant
      • +
      • enable_if
      • +
      • conditional
      • +
      • is_same
      • +
      • is_base_of
      • +
      • remove_const
      • +
      • remove_volatile
      • +
      • remove_cv
      • +
      • is_volatile
      • +
      • is_pointer
      • +
      • is_void
      • +
      • is_integral
      • +
      • is_floating_point
      • +
      • is_arithmetic
      • +
      • is_fundamental
      • +
      • is_trivially_copyable
      • +
      • alignment_of
      • +
      • aligned_storage
      • +
      +
    • +
    +

    (4) Functions and types that are STL-like (but aren't in the STL):

      +
    • TODO: min and max functors?
    • +
    +

    The idea is that, as we drop support for older compilers, we can simply #define the __NV_STD_XYZ macros and platform namespace to alias their C++ counterparts (or trivially find-and-replace their occurrences in code text).

    +

    Macro Definition Documentation

    + +

    ◆ __NV_STD_MAX

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MAX( a,
     
    )   (((b) > (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ __NV_STD_MIN

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __NV_STD_MIN( a,
     
    )   (((b) < (a)) ? (b) : (a))
    +
    + +
    +
    + +

    ◆ __platform_cat

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __platform_cat( a,
     
    )   __platform_cat_(a, b)
    +
    + +
    +
    + +

    ◆ __platform_cat_

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define __platform_cat_( a,
     
    )   a##b
    +
    + +
    +
    + +

    ◆ constexpr

    + +
    +
    + + + + +
    #define constexpr
    +
    + +
    +
    + +

    ◆ noexcept

    + +
    +
    + + + + +
    #define noexcept
    +
    + +
    +
    + +

    ◆ nullptr

    + +
    +
    + + + + +
    #define nullptr   0
    +
    + +
    +
    + +

    ◆ static_assert

    + +
    +
    + + + + + + + + + + + + + + + + + + +
    #define static_assert( __e,
     __m 
    )   typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    +
    + +
    +
    +
    + + + + diff --git a/docs/generated-html/platform_8h_source.html b/docs/generated-html/platform_8h_source.html new file mode 100644 index 0000000000..9dcbacf27a --- /dev/null +++ b/docs/generated-html/platform_8h_source.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: platform.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    platform.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    25 
    26 #pragma once
    27 
    94 //-----------------------------------------------------------------------------
    95 // Dependencies
    96 //-----------------------------------------------------------------------------
    97 
    98 #include <stdint.h>
    99 
    100 #if !defined(__CUDACC_RTC__)
    101 //-----------------------------------------------------------------------------
    102 // Include STL files that platform provides functionality for
    103 //-----------------------------------------------------------------------------
    104 
    105 #include <algorithm> // Minimum/maximum operations
    106 #include <cstddef> // nullptr_t
    107 #include <functional> // Arithmetic operations
    108 #include <utility> // For methods on std::pair
    109 #if (!defined(_MSC_VER) && (__cplusplus >= 201103L)) || (defined(_MSC_VER) && (_MS_VER >= 1500))
    110 #include <type_traits> // For integral constants, conditional metaprogramming, and type traits
    111 #endif
    112 
    113 #include <cutlass/cutlass.h>
    114 
    115 #endif
    116 /******************************************************************************
    117  * Macros
    118  ******************************************************************************/
    119 //-----------------------------------------------------------------------------
    120 // Keywords
    121 //-----------------------------------------------------------------------------
    122 
    124 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    125 #ifndef noexcept
    126 #define noexcept
    127 #endif
    128 #ifndef constexpr
    129 #define constexpr
    130 #endif
    131 #endif
    132 
    134 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1310))
    135 #ifndef nullptr
    136 #define nullptr 0
    137 #endif
    138 #endif
    139 
    141 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    142 #ifndef static_assert
    143 #define __platform_cat_(a, b) a##b
    144 #define __platform_cat(a, b) __platform_cat_(a, b)
    145 #define static_assert(__e, __m) typedef int __platform_cat(AsSeRt, __LINE__)[(__e) ? 1 : -1]
    146 #endif
    147 #endif
    148 
    149 //-----------------------------------------------------------------------------
    150 // Functions
    151 //-----------------------------------------------------------------------------
    152 
    154 #ifndef __NV_STD_MAX
    155 #define __NV_STD_MAX(a, b) (((b) > (a)) ? (b) : (a))
    156 #endif
    157 
    159 #ifndef __NV_STD_MIN
    160 #define __NV_STD_MIN(a, b) (((b) < (a)) ? (b) : (a))
    161 #endif
    162 
    163 /******************************************************************************
    164  * Re-implementations
    165  ******************************************************************************/
    166 namespace cutlass {
    167 namespace platform {
    168 
    169 //-----------------------------------------------------------------------------
    170 // Arithmetic operations, comparisons <functional>
    171 //-----------------------------------------------------------------------------
    172 
    174 template <typename T>
    175 struct plus {
    176  CUTLASS_HOST_DEVICE constexpr T operator()(const T& lhs, const T& rhs) const { return lhs + rhs; }
    177 };
    178 
    180 template <typename T>
    181 struct less {
    182  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    183  return lhs < rhs;
    184  }
    185 };
    186 
    188 template <typename T>
    189 struct greater {
    190  CUTLASS_HOST_DEVICE constexpr bool operator()(const T& lhs, const T& rhs) const {
    191  return lhs > rhs;
    192  }
    193 };
    194 
    195 //-----------------------------------------------------------------------------
    196 // Minimum/maximum operations <algorithm>
    197 //-----------------------------------------------------------------------------
    198 
    200 template <typename T>
    201 CUTLASS_HOST_DEVICE constexpr const T& min(const T& a, const T& b) {
    202  return (b < a) ? b : a;
    203 }
    204 
    206 template <typename T>
    207 CUTLASS_HOST_DEVICE constexpr const T& max(const T& a, const T& b) {
    208  return (a < b) ? b : a;
    209 }
    210 
    211 #if !defined(__CUDACC_RTC__)
    212 //-----------------------------------------------------------------------------
    213 // Methods on std::pair
    214 //-----------------------------------------------------------------------------
    215 
    216 using std::pair;
    217 
    218 template <class T1, class T2>
    219 CUTLASS_HOST_DEVICE constexpr bool operator==(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    220  return (lhs.first == rhs.first) && (lhs.second == rhs.second);
    221 }
    222 
    223 template <class T1, class T2>
    224 CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    225  return (lhs.first != rhs.first) && (lhs.second != rhs.second);
    226 }
    227 
    228 template <class T1, class T2>
    229 CUTLASS_HOST_DEVICE constexpr bool operator<(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    230  return (lhs.first < rhs.first) ? true : (rhs.first < lhs.first) ? false
    231  : (lhs.second < rhs.second);
    232 }
    233 
    234 template <class T1, class T2>
    235 CUTLASS_HOST_DEVICE constexpr bool operator<=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    236  return !(rhs < lhs);
    237 }
    238 
    239 template <class T1, class T2>
    240 CUTLASS_HOST_DEVICE constexpr bool operator>(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    241  return (rhs < lhs);
    242 }
    243 
    244 template <class T1, class T2>
    245 CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair<T1, T2>& lhs, const pair<T1, T2>& rhs) {
    246  return !(lhs < rhs);
    247 }
    248 
    249 template <class T1, class T2>
    250 CUTLASS_HOST_DEVICE std::pair<T1, T2> make_pair(T1 t, T2 u) {
    251  std::pair<T1, T2> retval;
    252  retval.first = t;
    253  retval.second = u;
    254  return retval;
    255 }
    256 #endif
    257 
    258 } // namespace platform
    259 
    260 /******************************************************************************
    261  * Implementations of C++ 11/14/17/... STL features
    262  ******************************************************************************/
    263 
    264 namespace platform {
    265 
    266 //-----------------------------------------------------------------------------
    267 // Integral constant helper types <type_traits>
    268 //-----------------------------------------------------------------------------
    269 
    270 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    271 
    273 template <typename value_t, value_t V>
    275 
    277 template <typename value_t, value_t V>
    278 struct integral_constant {
    279  static const value_t value = V;
    280 
    281  typedef value_t value_type;
    283 
    284  CUTLASS_HOST_DEVICE operator value_type() const { return value; }
    285 
    286  CUTLASS_HOST_DEVICE const value_type operator()() const { return value; }
    287 };
    288 
    289 #else
    290 
    291 using std::integral_constant;
    292 using std::pair;
    293 
    294 #endif
    295 
    298 
    301 
    302 #if (!defined(_MSC_VER) && (__cplusplus < 201402L)) || (defined(_MSC_VER) && (_MSC_VER < 1900))
    303 
    305 template <bool V>
    307 
    308 #else
    309 
    310 using std::bool_constant;
    311 
    312 #endif
    313 
    314 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1700))
    315 
    317 struct nullptr_t {};
    318 
    319 #else
    320 
    321 using std::nullptr_t;
    322 
    323 #endif
    324 
    325 //-----------------------------------------------------------------------------
    326 // Conditional metaprogramming <type_traits>
    327 //-----------------------------------------------------------------------------
    328 
    329 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1600))
    330 
    332 template <bool C, typename T = void>
    333 struct enable_if {
    334  typedef T type;
    335 };
    336 
    338 template <typename T>
    339 struct enable_if<false, T> {};
    340 
    342 template <bool B, class T, class F>
    343 struct conditional {
    344  typedef T type;
    345 };
    346 
    348 template <class T, class F>
    349 struct conditional<false, T, F> {
    350  typedef F type;
    351 };
    352 
    353 #else
    354 
    355 using std::enable_if;
    356 using std::conditional;
    357 
    358 #endif
    359 
    360 //-----------------------------------------------------------------------------
    361 // Const/volatility specifiers <type_traits>
    362 //-----------------------------------------------------------------------------
    363 
    364 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    365 
    367 template <typename T>
    368 struct remove_const {
    369  typedef T type;
    370 };
    371 
    373 template <typename T>
    374 struct remove_const<const T> {
    375  typedef T type;
    376 };
    377 
    379 template <typename T>
    381  typedef T type;
    382 };
    383 
    385 template <typename T>
    386 struct remove_volatile<volatile T> {
    387  typedef T type;
    388 };
    389 
    391 template <typename T>
    392 struct remove_cv {
    394 };
    395 
    396 #else
    397 
    398 using std::remove_const;
    399 using std::remove_volatile;
    400 using std::remove_cv;
    401 
    402 #endif
    403 
    404 //-----------------------------------------------------------------------------
    405 // Type relationships <type_traits>
    406 //-----------------------------------------------------------------------------
    407 
    408 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    409 
    411 template <typename A, typename B>
    412 struct is_same : false_type {};
    413 
    415 template <typename A>
    416 struct is_same<A, A> : true_type {};
    417 
    419 template <typename BaseT, typename DerivedT>
    421  typedef char (&yes)[1];
    422  typedef char (&no)[2];
    423 
    424  template <typename B, typename D>
    425  struct dummy {
    426  CUTLASS_HOST_DEVICE operator B*() const;
    427  CUTLASS_HOST_DEVICE operator D*();
    428  };
    429 
    430  template <typename T>
    431  CUTLASS_HOST_DEVICE static yes check(DerivedT*, T);
    432 
    433  CUTLASS_HOST_DEVICE static no check(BaseT*, int);
    434 
    435  static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes);
    436 };
    437 
    439 template <typename BaseT, typename DerivedT>
    441  : integral_constant<bool,
    442  (is_base_of_helper<typename remove_cv<BaseT>::type,
    443  typename remove_cv<DerivedT>::type>::value) ||
    444  (is_same<typename remove_cv<BaseT>::type,
    445  typename remove_cv<DerivedT>::type>::value)> {};
    446 
    447 #else
    448 
    449 using std::is_same;
    450 using std::is_base_of;
    451 
    452 #endif
    453 
    454 //-----------------------------------------------------------------------------
    455 // Type properties <type_traits>
    456 //-----------------------------------------------------------------------------
    457 
    458 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    459 
    461 template <typename T>
    463 template <typename T>
    464 struct is_volatile<volatile T> : true_type {};
    465 
    467 template <typename T>
    469 
    471 template <typename T>
    472 struct is_pointer_helper<T*> : true_type {};
    473 
    475 template <typename T>
    476 struct is_pointer : is_pointer_helper<typename remove_cv<T>::type> {};
    477 
    479 template <typename T>
    480 struct is_void : is_same<void, typename remove_cv<T>::type> {};
    481 
    483 template <typename T>
    485 template <>
    486 struct is_integral<char> : true_type {};
    487 template <>
    488 struct is_integral<signed char> : true_type {};
    489 template <>
    490 struct is_integral<unsigned char> : true_type {};
    491 template <>
    492 struct is_integral<short> : true_type {};
    493 template <>
    494 struct is_integral<unsigned short> : true_type {};
    495 template <>
    496 struct is_integral<int> : true_type {};
    497 template <>
    498 struct is_integral<unsigned int> : true_type {};
    499 template <>
    500 struct is_integral<long> : true_type {};
    501 template <>
    502 struct is_integral<unsigned long> : true_type {};
    503 template <>
    504 struct is_integral<long long> : true_type {};
    505 template <>
    506 struct is_integral<unsigned long long> : true_type {};
    507 template <typename T>
    508 struct is_integral<volatile T> : is_integral<T> {};
    509 template <typename T>
    510 struct is_integral<const T> : is_integral<T> {};
    511 template <typename T>
    512 struct is_integral<const volatile T> : is_integral<T> {};
    513 
    515 template <typename T>
    517  : integral_constant<bool,
    518  (is_same<float, typename remove_cv<T>::type>::value ||
    519  is_same<double, typename remove_cv<T>::type>::value)> {};
    520 
    522 template <typename T>
    524  : integral_constant<bool, (is_integral<T>::value || is_floating_point<T>::value)> {};
    525 
    527 template <typename T>
    529  : integral_constant<bool,
    530  (is_arithmetic<T>::value || is_void<T>::value ||
    531  is_same<nullptr_t, typename remove_cv<T>::type>::value)> {};
    532 
    533 #else
    534 
    535 using std::is_volatile;
    536 using std::is_pointer;
    537 using std::is_void;
    538 using std::is_integral;
    539 using std::is_floating_point;
    540 using std::is_arithmetic;
    541 using std::is_fundamental;
    542 
    543 #endif
    544 
    545 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
    546  (defined(__GNUG__) && (__GNUC__ < 5))
    547 
    558 template <typename T>
    560  : integral_constant<bool, (is_fundamental<T>::value || is_pointer<T>::value)> {};
    561 
    562 #else
    563 
    564 using std::is_trivially_copyable;
    565 
    566 #endif
    567 
    568 //-----------------------------------------------------------------------------
    569 // Alignment and layout utilities
    570 //-----------------------------------------------------------------------------
    571 
    572 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1500))
    573 
    575 template <typename value_t>
    576 struct alignment_of {
    577  struct pad {
    578  value_t val;
    579  char byte;
    580  };
    581 
    582  enum { value = sizeof(pad) - sizeof(value_t) };
    583 };
    584 
    585 #else
    586 
    587 template <typename value_t>
    588 struct alignment_of : std::alignment_of<value_t> {};
    589 
    590 #endif
    591 
    592 /* 16B specializations where 32-bit Win32 host compiler disagrees with device compiler */
    593 template <>
    594 struct alignment_of<int4> {
    595  enum { value = 16 };
    596 };
    597 template <>
    598 struct alignment_of<uint4> {
    599  enum { value = 16 };
    600 };
    601 template <>
    602 struct alignment_of<float4> {
    603  enum { value = 16 };
    604 };
    605 template <>
    606 struct alignment_of<long4> {
    607  enum { value = 16 };
    608 };
    609 template <>
    610 struct alignment_of<ulong4> {
    611  enum { value = 16 };
    612 };
    613 template <>
    614 struct alignment_of<longlong2> {
    615  enum { value = 16 };
    616 };
    617 template <>
    618 struct alignment_of<ulonglong2> {
    619  enum { value = 16 };
    620 };
    621 template <>
    622 struct alignment_of<double2> {
    623  enum { value = 16 };
    624 };
    625 template <>
    626 struct alignment_of<longlong4> {
    627  enum { value = 16 };
    628 };
    629 template <>
    630 struct alignment_of<ulonglong4> {
    631  enum { value = 16 };
    632 };
    633 template <>
    634 struct alignment_of<double4> {
    635  enum { value = 16 };
    636 };
    637 
    638 // Specializations for volatile/const qualified types
    639 template <typename value_t>
    640 struct alignment_of<volatile value_t> : alignment_of<value_t> {};
    641 template <typename value_t>
    642 struct alignment_of<const value_t> : alignment_of<value_t> {};
    643 template <typename value_t>
    644 struct alignment_of<const volatile value_t> : alignment_of<value_t> {};
    645 
    646 #if (!defined(_MSC_VER) && (__cplusplus < 201103L)) || (defined(_MSC_VER) && (_MSC_VER < 1800))
    647 
    648 template <size_t Align>
    650 template <>
    651 struct __align__(1) aligned_chunk<1> {
    652  uint8_t buff;
    653 };
    654 template <>
    655 struct __align__(2) aligned_chunk<2> {
    656  uint16_t buff;
    657 };
    658 template <>
    659 struct __align__(4) aligned_chunk<4> {
    660  uint32_t buff;
    661 };
    662 template <>
    663 struct __align__(8) aligned_chunk<8> {
    664  uint32_t buff[2];
    665 };
    666 template <>
    667 struct __align__(16) aligned_chunk<16> {
    668  uint32_t buff[4];
    669 };
    670 template <>
    671 struct __align__(32) aligned_chunk<32> {
    672  uint32_t buff[8];
    673 };
    674 template <>
    675 struct __align__(64) aligned_chunk<64> {
    676  uint32_t buff[16];
    677 };
    678 template <>
    679 struct __align__(128) aligned_chunk<128> {
    680  uint32_t buff[32];
    681 };
    682 template <>
    683 struct __align__(256) aligned_chunk<256> {
    684  uint32_t buff[64];
    685 };
    686 template <>
    687 struct __align__(512) aligned_chunk<512> {
    688  uint32_t buff[128];
    689 };
    690 template <>
    691 struct __align__(1024) aligned_chunk<1024> {
    692  uint32_t buff[256];
    693 };
    694 template <>
    695 struct __align__(2048) aligned_chunk<2048> {
    696  uint32_t buff[512];
    697 };
    698 template <>
    699 struct __align__(4096) aligned_chunk<4096> {
    700  uint32_t buff[1024];
    701 };
    702 
    704 template <size_t Len, size_t Align>
    707 };
    708 
    709 #else
    710 
    711 using std::aligned_storage;
    712 
    713 #endif
    714 
    715 #if !defined(__CUDACC_RTC__)
    716 template <typename T>
    719  void operator()(T* ptr) const { delete ptr; }
    720 };
    721 
    723 template <typename T>
    724 struct default_delete<T[]> {
    725  void operator()(T* ptr) const { delete[] ptr; }
    726 };
    727 
    729 template <class T, class Deleter = default_delete<T> >
    730 class unique_ptr {
    731  public:
    732  typedef T* pointer;
    733  typedef T element_type;
    734  typedef Deleter deleter_type;
    735 
    736  private:
    738  pointer _ptr;
    739 
    741  deleter_type _deleter;
    742 
    743  public:
    744  unique_ptr() : _ptr(nullptr) {}
    745  unique_ptr(pointer p) : _ptr(p) {}
    746 
    748  if (_ptr) {
    749  _deleter(_ptr);
    750  }
    751  }
    753  pointer get() const noexcept { return _ptr; }
    754 
    757  pointer p(_ptr);
    758  _ptr = nullptr;
    759  return p;
    760  }
    761 
    764  pointer old_ptr = _ptr;
    765  _ptr = p;
    766  if (old_ptr != nullptr) {
    767  get_deleter()(old_ptr);
    768  }
    769  }
    770 
    772  void swap(unique_ptr& other) noexcept { std::swap(_ptr, other._ptr); }
    773 
    775  Deleter& get_deleter() noexcept { return _deleter; }
    776 
    778  Deleter const& get_deleter() const noexcept { return _deleter; }
    779 
    781  operator bool() const noexcept { return _ptr != nullptr; }
    782 
    784  T& operator*() const { return *_ptr; }
    785 
    787  pointer operator->() const noexcept { return _ptr; }
    788 
    790  T& operator[](size_t i) const { return _ptr[i]; }
    791 };
    792 
    794 template <typename T, typename Deleter>
    796  lhs.swap(rhs);
    797 }
    798 #endif
    799 
    800 }; // namespace platform
    801 }; // namespace cutlass
    static const value_t value
    Definition: platform.h:279
    +
    CUTLASS_HOST_DEVICE constexpr const T & max(const T &a, const T &b)
    std::max
    Definition: platform.h:207
    +
    Definition: convert.h:33
    +
    #define constexpr
    Definition: platform.h:129
    +
    std::nullptr_t
    Definition: platform.h:317
    +
    void swap(unique_ptr< T, Deleter > &lhs, unique_ptr< T, Deleter > &rhs) noexcept
    Specializes the swap algorithm.
    Definition: platform.h:795
    +
    Helper for std::is_pointer (false specialization)
    Definition: platform.h:468
    +
    Deleter deleter_type
    Definition: platform.h:734
    +
    T type
    Definition: platform.h:369
    +
    value_t val
    Definition: platform.h:578
    +
    T type
    Definition: platform.h:344
    +
    T * pointer
    Definition: platform.h:732
    +
    std::less
    Definition: platform.h:181
    +
    std::is_same (false specialization)
    Definition: platform.h:412
    +
    std::is_pointer
    Definition: platform.h:476
    +
    value_t value_type
    Definition: platform.h:281
    +
    CUTLASS_HOST_DEVICE std::pair< T1, T2 > make_pair(T1 t, T2 u)
    Definition: platform.h:250
    +
    unique_ptr()
    Definition: platform.h:744
    +
    std::greater
    Definition: platform.h:189
    +
    CUTLASS_HOST_DEVICE constexpr bool operator==(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:219
    +
    std::is_void
    Definition: platform.h:480
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:245
    +
    pointer operator->() const noexcept
    Returns a pointer to the managed object.
    Definition: platform.h:787
    +
    T & operator[](size_t i) const
    Array access to managed object.
    Definition: platform.h:790
    +
    CUTLASS_HOST_DEVICE constexpr bool operator>(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:240
    +
    void operator()(T *ptr) const
    Definition: platform.h:725
    +
    Default deleter.
    Definition: platform.h:718
    +
    Definition: platform.h:582
    +
    CUTLASS_HOST_DEVICE constexpr bool operator!=(const pair< T1, T2 > &lhs, const pair< T1, T2 > &rhs)
    Definition: platform.h:224
    +
    std::unique_ptr
    Definition: platform.h:730
    +
    Definition: platform.h:577
    +
    std::is_floating_point
    Definition: platform.h:516
    + +
    integral_constant< bool, false > false_type
    The type used as a compile-time boolean with false value.
    Definition: platform.h:300
    +
    Deleter const & get_deleter() const noexcept
    Returns the deleter object.
    Definition: platform.h:778
    +
    std::remove_cv
    Definition: platform.h:392
    +
    CUTLASS_HOST_DEVICE const value_type operator()() const
    Definition: platform.h:286
    +
    ~unique_ptr()
    Definition: platform.h:747
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: platform.h:190
    +
    struct __align__(1) aligned_chunk< 1 >
    Definition: platform.h:651
    +
    T type
    Definition: platform.h:375
    +
    T type
    Definition: platform.h:381
    +
    std::is_integral
    Definition: platform.h:484
    + + +
    integral_constant< value_t, V > type
    Definition: platform.h:282
    +
    std::is_arithmetic
    Definition: platform.h:523
    +
    char byte
    Definition: platform.h:579
    +
    std::integral_constant
    Definition: platform.h:274
    +
    std::is_base_of
    Definition: platform.h:440
    +
    T type
    Definition: platform.h:334
    +
    #define nullptr
    nullptr
    Definition: platform.h:136
    +
    std::is_volatile
    Definition: platform.h:462
    +
    std::is_fundamental
    Definition: platform.h:528
    +
    platform::plus
    Definition: platform.h:175
    +
    std::enable_if (true specialization)
    Definition: platform.h:333
    +
    integral_constant< bool, true > true_type
    The type used as a compile-time boolean with true value.
    Definition: platform.h:297
    +
    void operator()(T *ptr) const
    Definition: platform.h:719
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    T element_type
    Definition: platform.h:733
    +
    Deleter & get_deleter() noexcept
    Returns the deleter object.
    Definition: platform.h:775
    +
    std::alignment_of
    Definition: platform.h:576
    +
    CUTLASS_HOST_DEVICE constexpr const T & min(const T &a, const T &b)
    std::min
    Definition: platform.h:201
    +
    remove_volatile< typename remove_const< T >::type >::type type
    Definition: platform.h:393
    +
    std::conditional (true specialization)
    Definition: platform.h:343
    +
    #define noexcept
    noexcept, constexpr
    Definition: platform.h:126
    +
    void reset(pointer p=pointer()) noexcept
    Replaces the managed object, deleting the old object.
    Definition: platform.h:763
    +
    T & operator*() const
    Dereferences the unique_ptr.
    Definition: platform.h:784
    +
    Helper for std::is_base_of.
    Definition: platform.h:420
    +
    std::remove_const (non-const specialization)
    Definition: platform.h:368
    +
    CUTLASS_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const
    Definition: platform.h:176
    +
    CUTLASS_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const
    Definition: platform.h:182
    +
    Definition: platform.h:649
    +
    static CUTLASS_HOST_DEVICE yes check(DerivedT *, T)
    + +
    void swap(unique_ptr &other) noexcept
    Swaps the managed objects with *this and another unique_ptr.
    Definition: platform.h:772
    +
    static const bool value
    Definition: platform.h:435
    +
    aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
    Definition: platform.h:706
    +
    std::aligned_storage
    Definition: platform.h:705
    +
    std::remove_volatile (non-volatile specialization)
    Definition: platform.h:380
    +
    unique_ptr(pointer p)
    Definition: platform.h:745
    +
    char(& yes)[1]
    Definition: platform.h:421
    +
    pointer release() noexcept
    Releases ownership of the managed object, if any.
    Definition: platform.h:756
    +
    Basic include for CUTLASS macros.
    +
    std::bool_constant
    Definition: platform.h:306
    +
    char(& no)[2]
    Definition: platform.h:422
    +
    + + + + diff --git a/docs/generated-html/predicate__vector_8h.html b/docs/generated-html/predicate__vector_8h.html new file mode 100644 index 0000000000..42e3f56f3e --- /dev/null +++ b/docs/generated-html/predicate__vector_8h.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: predicate_vector.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    predicate_vector.h File Reference
    +
    +
    + +

    Defines container classes and iterators for managing a statically sized vector of boolean predicates. +More...

    +
    #include <stdint.h>
    +#include <cutlass/cutlass.h>
    +#include <cutlass/shape.h>
    +#include <cutlass/util/platform.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >
     Statically sized array of bits implementing. More...
     
    class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator
     A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. More...
     
    class  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator
     An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. More...
     
    struct  cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator
     Iterator that always returns true. More...
     
    struct  cutlass::TrivialPredicateTileAdapter
     Always returns true predicate. More...
     
    struct  cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    struct  cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >
     Adapter to enable random access to predicates via logical coordinate within a tile. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/predicate__vector_8h_source.html b/docs/generated-html/predicate__vector_8h_source.html new file mode 100644 index 0000000000..fed29ff10b --- /dev/null +++ b/docs/generated-html/predicate__vector_8h_source.html @@ -0,0 +1,155 @@ + + + + + + + +Cutlass: predicate_vector.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    predicate_vector.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <stdint.h>
    32 
    33 #include <cutlass/cutlass.h>
    34 #include <cutlass/shape.h>
    35 
    36 #include <cutlass/util/platform.h>
    37 
    38 namespace cutlass {
    39 
    41 
    58 
    78 
    94 
    97 template <
    99  int kPredicates_,
    101  int kPredicatesPerByte_ = 4,
    103  int kPredicateStart_ = 0>
    106  static int const kPredicates = kPredicates_;
    107 
    109  static int const kPredicatesPerByte = kPredicatesPerByte_;
    110 
    112  static int const kPredicateStart = kPredicateStart_;
    113 
    114  // Make sure no one tries to put more than 8 bits in a byte :)
    115  static_assert(kPredicatesPerByte <= 8, "kPredicatesPerByte must fit within an actual byte");
    116  // Make sure the "offsetted" bits fit in one byte.
    118  "The offsetted predicates must fit within an actual byte.");
    119 
    121  typedef uint32_t Storage;
    122 
    125 
    127  static int const kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage);
    128 
    129  private:
    130  //
    131  // Data members
    132  //
    133 
    135  Storage storageData[kWordCount];
    136 
    137  //
    138  // Methods
    139  //
    140 
    142  CUTLASS_HOST_DEVICE void computeStorageOffset(int &word, int &bit, int idx) const {
    144 
    145  int byte = (idx / kPredicatesPerByte);
    146  int bit_offset = (idx % kPredicatesPerByte);
    147 
    148  word = byte / sizeof(Storage);
    149  int byte_offset = (byte % sizeof(Storage));
    150 
    151  bit = byte_offset * 8 + bit_offset + kPredicateStart;
    152  }
    153 
    155  CUTLASS_HOST_DEVICE Storage &storage(int word) {
    156  CUTLASS_ASSERT(word < kWordCount);
    157  return storageData[word];
    158  }
    159 
    161  CUTLASS_HOST_DEVICE Storage const &storage(int word) const {
    162  CUTLASS_ASSERT(word < kWordCount);
    163  return storageData[word];
    164  }
    165 
    166  public:
    167  //
    168  // Iterator
    169  //
    170 
    178  PredicateVector const &vec_;
    179 
    181  int bit_;
    182 
    183  public:
    186  ConstIterator(ConstIterator const &it) : vec_(it.vec_), bit_(it.bit_) {}
    187 
    190  ConstIterator(PredicateVector const &_vec, int _start = 0) : vec_(_vec), bit_(_start) {}
    191 
    195  ++bit_;
    196  return *this;
    197  }
    198 
    202  --bit_;
    203  return *this;
    204  }
    205 
    209  ConstIterator ret(*this);
    210  ret.bit_++;
    211  return ret;
    212  }
    213 
    217  ConstIterator ret(*this);
    218  ret.bit_--;
    219  return ret;
    220  }
    221 
    224  bool operator==(ConstIterator const &it) const { return bit_ == it.bit_; }
    225 
    228  bool operator!=(ConstIterator const &it) const { return bit_ != it.bit_; }
    229 
    232  bool operator*() const { return vec_[bit_]; }
    233  };
    234 
    240  class Iterator {
    242  PredicateVector &vec_;
    243 
    245  int bit_;
    246 
    247  public:
    250  Iterator(Iterator const &it) : vec_(it.vec_), bit_(it.bit_) {}
    251 
    254  Iterator(PredicateVector &_vec, int _start = 0) : vec_(_vec), bit_(_start) {}
    255 
    259  ++bit_;
    260  return *this;
    261  }
    262 
    266  --bit_;
    267  return *this;
    268  }
    269 
    273  Iterator ret(*this);
    274  ret.bit_++;
    275  return ret;
    276  }
    277 
    281  Iterator ret(*this);
    282  ret.bit_--;
    283  return ret;
    284  }
    285 
    288  bool operator==(Iterator const &it) const { return bit_ == it.bit_; }
    289 
    292  bool operator!=(Iterator const &it) const { return bit_ != it.bit_; }
    293 
    296  bool get() { return vec_[bit_]; }
    297 
    300  bool operator*() const { return vec_[bit_]; }
    301 
    304  void set(bool value = true) { vec_.set(bit_, value); }
    305  };
    306 
    312 
    315  TrivialIterator(Iterator const &it) {}
    316 
    320 
    323  TrivialIterator &operator++() { return *this; }
    324 
    327  TrivialIterator operator++(int) { return *this; }
    328 
    331  bool operator*() const { return true; }
    332  };
    333 
    334  public:
    335  //
    336  // Methods
    337  //
    338 
    340  CUTLASS_HOST_DEVICE PredicateVector(bool value = true) { fill(value); }
    341 
    343  CUTLASS_HOST_DEVICE void fill(bool value = true) {
    344  Storage item = (value ? ~Storage(0) : Storage(0));
    345 
    347  for (int i = 0; i < kWordCount; ++i) {
    348  storage(i) = item;
    349  }
    350  }
    351 
    353  CUTLASS_HOST_DEVICE bool operator[](int idx) const { return at(idx); }
    354 
    356  CUTLASS_HOST_DEVICE bool at(int idx) const {
    357  int bit, word;
    358  computeStorageOffset(word, bit, idx);
    359 
    360  return ((storage(word) >> bit) & 1);
    361  }
    362 
    364  CUTLASS_HOST_DEVICE void set(int idx, bool value = true) {
    365  int bit, word;
    366  computeStorageOffset(word, bit, idx);
    367 
    368  Storage disable_mask = (~(Storage(1) << bit));
    369  Storage enable_mask = (Storage(value) << bit);
    370 
    371  storage(word) = ((storage(word) & disable_mask) | enable_mask);
    372  }
    373 
    377  for (int i = 0; i < kWordCount; ++i) {
    378  storage(i) = (storage(i) & predicates.storage(i));
    379  }
    380  return *this;
    381  }
    382 
    386  for (int i = 0; i < kWordCount; ++i) {
    387  storage(i) = (storage(i) | predicates.storage(i));
    388  }
    389  return *this;
    390  }
    391 
    394  Storage mask(0);
    395  for (int byte = 0; byte < sizeof(Storage); ++byte) {
    396  Storage byte_mask = (((1 << kPredicatesPerByte) - 1) << kPredicateStart);
    397  mask |= (byte_mask << (byte * 8));
    398  }
    399  uint32_t result = 0;
    400  for (int word = 0; word < kWordCount; ++word) {
    401  result |= storage(word);
    402  }
    403  return result == 0;
    404  }
    405 
    407  CUTLASS_DEVICE
    408  Iterator begin() { return Iterator(*this); }
    409 
    411  CUTLASS_DEVICE
    412  Iterator end() { return Iterator(*this, kPredicates); }
    413 
    415  CUTLASS_DEVICE
    416  ConstIterator const_begin() const { return ConstIterator(*this); }
    417 
    419  CUTLASS_DEVICE
    420  ConstIterator const_end() const { return ConstIterator(*this, kPredicates); }
    421 };
    422 
    424 
    429 
    431  CUTLASS_HOST_DEVICE bool at(int, int, int, int) const { return true; }
    432 };
    433 
    435 
    437 template <typename PredicateVector_, typename Iterations_>
    440  typedef PredicateVector_ PredicateVector;
    442  typedef Iterations_ Iterations;
    443 
    444  private:
    446  PredicateVector &predicates;
    447 
    448  public:
    450  CUTLASS_DEVICE PredicateTileAdapter(PredicateVector &predicates_) : predicates(predicates_) {}
    451 
    453  CUTLASS_DEVICE bool at(int d, int h, int w, int c) const {
    454  int const bit = ComputeOffsetFromShape<Iterations>::get(d, h, w, c);
    455  return predicates.at(bit);
    456  }
    457 
    459  CUTLASS_DEVICE void set(int d, int h, int w, int c, bool value) {
    460  int const bit = ComputeOffsetFromShape<Iterations>::get(d, h, w, c);
    461  predicates.set(bit, value);
    462  }
    463 };
    464 
    466 
    468 template <typename PredicateVector_, typename Iterations_>
    471  typedef PredicateVector_ PredicateVector;
    473  typedef Iterations_ Iterations;
    474 
    475  private:
    477  PredicateVector const &predicates;
    478 
    479  public:
    481  CUTLASS_DEVICE ConstPredicateTileAdapter(PredicateVector const &predicates_)
    482  : predicates(predicates_) {}
    483 
    485  CUTLASS_DEVICE bool at(int d, int h, int w, int c) const {
    486  int const bit = ComputeOffsetFromShape<Iterations>::get(d, h, w, c);
    487  return predicates.at(bit);
    488  }
    489 };
    490 
    492 
    493 } // namespace cutlass
    CUTLASS_HOST_DEVICE Iterator(PredicateVector &_vec, int _start=0)
    Constructs an iterator from a PredicateVector.
    Definition: predicate_vector.h:254
    +
    CUTLASS_HOST_DEVICE bool operator!=(ConstIterator const &it) const
    Returns false if iterators point to the same bit.
    Definition: predicate_vector.h:228
    +
    CUTLASS_HOST_DEVICE PredicateVector & operator|=(PredicateVector const &predicates)
    Computes the union of two identical predicate vectors.
    Definition: predicate_vector.h:384
    +
    CUTLASS_HOST_DEVICE TrivialIterator & operator++()
    Pre-increment.
    Definition: predicate_vector.h:323
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE bool is_zero() const
    Returns true if entire predicate array is zero.
    Definition: predicate_vector.h:393
    +
    uint32_t Storage
    Storage type of individual elements.
    Definition: predicate_vector.h:115
    +
    CUTLASS_HOST_DEVICE TrivialIterator(PredicateVector const &_vec)
    Constructs an iterator from a PredicateVector.
    Definition: predicate_vector.h:319
    +
    CUTLASS_HOST_DEVICE ConstIterator & operator--()
    Pre-decrement.
    Definition: predicate_vector.h:201
    +
    static int const kBytes
    Number of bytes needed.
    Definition: predicate_vector.h:124
    +
    CUTLASS_DEVICE ConstIterator const_begin() const
    Returns a ConstIterator.
    Definition: predicate_vector.h:416
    +
    CUTLASS_HOST_DEVICE ConstIterator(PredicateVector const &_vec, int _start=0)
    Definition: predicate_vector.h:190
    +
    CUTLASS_HOST_DEVICE bool at(int idx) const
    Accesses a bit within the predicate vector.
    Definition: predicate_vector.h:356
    +
    CUTLASS_HOST_DEVICE ConstIterator & operator++()
    Pre-increment.
    Definition: predicate_vector.h:194
    +
    PredicateVector_ PredicateVector
    The vector of predicates.
    Definition: predicate_vector.h:440
    +
    static CUTLASS_DEVICE int get(int d, int h, int w, int c)
    Definition: shape.h:166
    +
    CUTLASS_HOST_DEVICE ConstIterator operator++(int)
    Post-increment.
    Definition: predicate_vector.h:208
    +
    CUTLASS_HOST_DEVICE Iterator operator++(int)
    Post-increment.
    Definition: predicate_vector.h:272
    +
    Adapter to enable random access to predicates via logical coordinate within a tile.
    Definition: predicate_vector.h:438
    +
    CUTLASS_HOST_DEVICE TrivialIterator(Iterator const &it)
    Copy constructor.
    Definition: predicate_vector.h:315
    +
    C++ features that may be otherwise unimplemented for CUDA device functions.
    +
    Iterator that always returns true.
    Definition: predicate_vector.h:308
    +
    CUTLASS_HOST_DEVICE TrivialIterator operator++(int)
    Post-increment.
    Definition: predicate_vector.h:327
    +
    CUTLASS_HOST_DEVICE bool operator==(Iterator const &it) const
    Returns true if iterators point to the same bit.
    Definition: predicate_vector.h:288
    +
    CUTLASS_DEVICE PredicateTileAdapter(PredicateVector &predicates_)
    Ctor.
    Definition: predicate_vector.h:450
    +
    CUTLASS_DEVICE bool at(int d, int h, int w, int c) const
    Get the value at location (d, h, w, c).
    Definition: predicate_vector.h:453
    +
    #define CUTLASS_PRAGMA_UNROLL
    Definition: cutlass.h:60
    +
    CUTLASS_DEVICE bool at(int d, int h, int w, int c) const
    Get the value at location (d, h, w, c).
    Definition: predicate_vector.h:485
    +
    CUTLASS_HOST_DEVICE Iterator & operator--()
    Pre-decrement.
    Definition: predicate_vector.h:265
    +
    PredicateVector_ PredicateVector
    The vector of predicates.
    Definition: predicate_vector.h:471
    +
    CUTLASS_HOST_DEVICE PredicateVector & operator &=(PredicateVector const &predicates)
    Computes the intersection of two identical predicate vectors.
    Definition: predicate_vector.h:375
    +
    CUTLASS_HOST_DEVICE Iterator(Iterator const &it)
    Copy constructor.
    Definition: predicate_vector.h:250
    +
    CUTLASS_HOST_DEVICE bool operator[](int idx) const
    Accesses a bit within the predicate vector.
    Definition: predicate_vector.h:353
    +
    CUTLASS_HOST_DEVICE bool operator*() const
    Dereferences iterator.
    Definition: predicate_vector.h:300
    +
    CUTLASS_HOST_DEVICE bool operator*() const
    Dereferences iterator.
    Definition: predicate_vector.h:331
    +
    CUTLASS_HOST_DEVICE void fill(bool value=true)
    Fills all predicates with a given value.
    Definition: predicate_vector.h:343
    +
    static int const kPredicates
    Number of bits stored by the PredicateVector.
    Definition: predicate_vector.h:106
    +
    CUTLASS_DEVICE Iterator end()
    Returns an iterator.
    Definition: predicate_vector.h:412
    +
    #define CUTLASS_ASSERT(x)
    Definition: cutlass.h:64
    +
    CUTLASS_HOST_DEVICE bool at(int, int, int, int) const
    The value at location (d, h, w, c).
    Definition: predicate_vector.h:431
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    static int const kPredicatesPerByte
    Number of bits stored within each byte of the predicate bit vector.
    Definition: predicate_vector.h:109
    +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    Statically sized array of bits implementing.
    Definition: predicate_vector.h:104
    +
    static int const kWordCount
    Number of storage elements needed.
    Definition: predicate_vector.h:127
    +
    CUTLASS_DEVICE ConstIterator const_end() const
    Returns a ConstIterator.
    Definition: predicate_vector.h:420
    +
    Always returns true predicate.
    Definition: predicate_vector.h:426
    +
    CUTLASS_HOST_DEVICE Iterator & operator++()
    Pre-increment.
    Definition: predicate_vector.h:258
    +
    A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to pred...
    Definition: predicate_vector.h:176
    +
    CUTLASS_HOST_DEVICE void set(int idx, bool value=true)
    Set a bit within the predicate vector.
    Definition: predicate_vector.h:364
    +
    CUTLASS_HOST_DEVICE bool operator==(ConstIterator const &it) const
    Returns true if iterators point to the same bit.
    Definition: predicate_vector.h:224
    +
    Iterations_ Iterations
    The iterations.
    Definition: predicate_vector.h:473
    +
    Iterations_ Iterations
    The iterations.
    Definition: predicate_vector.h:442
    +
    CUTLASS_HOST_DEVICE bool operator*() const
    Dereferences iterator.
    Definition: predicate_vector.h:232
    +
    CUTLASS_HOST_DEVICE bool operator!=(Iterator const &it) const
    Returns false if iterators point to the same bit.
    Definition: predicate_vector.h:292
    +
    static int const kPredicateStart
    First bit withing each byte containing predicates.
    Definition: predicate_vector.h:112
    +
    CUTLASS_HOST_DEVICE ConstIterator(ConstIterator const &it)
    Copy constructor.
    Definition: predicate_vector.h:186
    +
    CUTLASS_HOST_DEVICE TrivialPredicateTileAdapter()
    Ctor.
    Definition: predicate_vector.h:428
    +
    CUTLASS_HOST_DEVICE ConstIterator operator--(int)
    Post-decrement.
    Definition: predicate_vector.h:216
    +
    Adapter to enable random access to predicates via logical coordinate within a tile.
    Definition: predicate_vector.h:469
    +
    CUTLASS_DEVICE ConstPredicateTileAdapter(PredicateVector const &predicates_)
    Ctor.
    Definition: predicate_vector.h:481
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    CUTLASS_HOST_DEVICE PredicateVector(bool value=true)
    Initialize the predicate vector.
    Definition: predicate_vector.h:340
    +
    CUTLASS_DEVICE Iterator begin()
    Returns an iterator to the start of the bit vector.
    Definition: predicate_vector.h:408
    +
    Basic include for CUTLASS macros.
    +
    An iterator implementing Predicate Iterator Concept enabling sequential read and write access to pred...
    Definition: predicate_vector.h:240
    +
    CUTLASS_HOST_DEVICE Iterator operator--(int)
    Post-decrement.
    Definition: predicate_vector.h:280
    +
    CUTLASS_HOST_DEVICE TrivialIterator()
    Constructor.
    Definition: predicate_vector.h:311
    +
    + + + + diff --git a/docs/generated-html/reshape__tile_8h.html b/docs/generated-html/reshape__tile_8h.html new file mode 100644 index 0000000000..3712944a1b --- /dev/null +++ b/docs/generated-html/reshape__tile_8h.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: reshape_tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    reshape_tile.h File Reference
    +
    +
    + +

    Defines a type for restructuring a tile. +More...

    +
    #include <cutlass/shape.h>
    +
    +

    Go to the source code of this file.

    + + + + + + +

    +Classes

    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, bool >
     
    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, true >
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/reshape__tile_8h_source.html b/docs/generated-html/reshape__tile_8h_source.html new file mode 100644 index 0000000000..bb7a117963 --- /dev/null +++ b/docs/generated-html/reshape__tile_8h_source.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: reshape_tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    reshape_tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/shape.h>
    31 
    32 namespace cutlass {
    33 
    35 
    36 // The following functor reshapes a tile of data. The goal is to have at least kAccessSize in
    37 // the inner-most dimension. If the user respects that constraint, there is nothing to be done. If
    38 // that's not the case, this functor will correct that and "extract" the right number of elements
    39 // from the next dimension.
    40 
    41 template <typename Tile_, int kAccessSize_, bool = (Tile_::kC < kAccessSize_)>
    42 struct ReshapeTile {
    43  typedef Tile_ Tile;
    44 };
    45 
    46 template <typename Tile_, int kAccessSize_>
    48  // Make sure the W dimension of the tile is large enough.
    49  static_assert(Tile_::kW >= kAccessSize_, "The W dimension is too small");
    50  // Make sure the dimension can be divided by the number of scalars.
    51  static_assert(Tile_::kW % kAccessSize_ == 0, "Not supported");
    52  // Collapse the W dimension.
    53  typedef Shape<Tile_::kD, Tile_::kH, Tile_::kW / kAccessSize_, kAccessSize_> Tile;
    54 };
    55 
    57 
    58 } // namespace cutlass
    Definition: convert.h:33
    + +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Shape< Tile_::kD, Tile_::kH, Tile_::kW/kAccessSize_, kAccessSize_ > Tile
    Definition: reshape_tile.h:49
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    + + + + diff --git a/docs/generated-html/search/all_0.html b/docs/generated-html/search/all_0.html new file mode 100644 index 0000000000..5125b94009 --- /dev/null +++ b/docs/generated-html/search/all_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_0.js b/docs/generated-html/search/all_0.js new file mode 100644 index 0000000000..0165dcec14 --- /dev/null +++ b/docs/generated-html/search/all_0.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['_5f_5falign_5f_5f',['__align__',['../namespacecutlass_1_1platform.html#ac9068e2d027ffdf5cd564deecc2cb9e8',1,'cutlass::platform::__align__(1) aligned_chunk< 1 >'],['../namespacecutlass_1_1platform.html#a0bcb016704ec57f9499e662ba6156f98',1,'cutlass::platform::__align__(2) aligned_chunk< 2 >'],['../namespacecutlass_1_1platform.html#a71be5af25eeffa4077777f919e67d8da',1,'cutlass::platform::__align__(4) aligned_chunk< 4 >'],['../namespacecutlass_1_1platform.html#a42440254a16d4b6b95b95cc3360ee372',1,'cutlass::platform::__align__(8) aligned_chunk< 8 >'],['../namespacecutlass_1_1platform.html#a91d5e970d6ebe619914f40a9510bdb1e',1,'cutlass::platform::__align__(16) aligned_chunk< 16 >'],['../namespacecutlass_1_1platform.html#a210f4d360b1f9c3d074e71129fe4c0d9',1,'cutlass::platform::__align__(32) aligned_chunk< 32 >'],['../namespacecutlass_1_1platform.html#ae792b1c7ada1a33e306cd552f583bdce',1,'cutlass::platform::__align__(64) aligned_chunk< 64 >'],['../namespacecutlass_1_1platform.html#a5712ec4fed335a9b7f863fb3abe3c5eb',1,'cutlass::platform::__align__(128) aligned_chunk< 128 >'],['../namespacecutlass_1_1platform.html#a595cc98db29fb4d59772d2e2f52e347a',1,'cutlass::platform::__align__(256) aligned_chunk< 256 >'],['../namespacecutlass_1_1platform.html#ae70bb5d14a66500b47d2e3f83063d4a5',1,'cutlass::platform::__align__(512) aligned_chunk< 512 >'],['../namespacecutlass_1_1platform.html#a181e44e9c66f704175590727aaa9e5a1',1,'cutlass::platform::__align__(1024) aligned_chunk< 1024 >'],['../namespacecutlass_1_1platform.html#ae72c8fa997bb251d4140dceb03147154',1,'cutlass::platform::__align__(2048) aligned_chunk< 2048 >'],['../namespacecutlass_1_1platform.html#ada29683f1b408ae7b73cc8fbe2108628',1,'cutlass::platform::__align__(4096) aligned_chunk< 4096 >'],['../namespacecutlass.html#ae6ee3d9361526f859d737d9c68c13706',1,'cutlass::__align__(1) AlignedStruct< 1 >'],['../namespacecutlass.html#a602227fad962270da185209ecc6012f2',1,'cutlass::__align__(2) AlignedStruct< 2 >'],['../namespacecutlass.html#a266d7d2ae6e79537e46ee37b4fdface7',1,'cutlass::__align__(4) AlignedStruct< 4 >'],['../namespacecutlass.html#a1101e01215ddb0e5a7b120a4541a3c4e',1,'cutlass::__align__(8) AlignedStruct< 8 >'],['../namespacecutlass.html#aa4071cf5103f352a5100d9b4bba895e2',1,'cutlass::__align__(16) AlignedStruct< 16 >'],['../namespacecutlass.html#ada65694bdd4b70d4c9d769a536275a47',1,'cutlass::__align__(32) AlignedStruct< 32 >'],['../namespacecutlass.html#aa80a7cb3febd19b96f2ecbcb610b1b9e',1,'cutlass::__align__(64) AlignedStruct< 64 >']]], + ['_5f_5fnv_5fstd_5fmax',['__NV_STD_MAX',['../platform_8h.html#abd31f291635329bc15292954f1f01d38',1,'platform.h']]], + ['_5f_5fnv_5fstd_5fmin',['__NV_STD_MIN',['../platform_8h.html#a39e234a3e3b0018b58df720bcb143420',1,'platform.h']]], + ['_5f_5fplatform_5fcat',['__platform_cat',['../platform_8h.html#aece7fe71be5aaf8d12dc9e2372f97de4',1,'platform.h']]], + ['_5f_5fplatform_5fcat_5f',['__platform_cat_',['../platform_8h.html#acd148999a5caeba8f6fd52e7e288e659',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/all_1.html b/docs/generated-html/search/all_1.html new file mode 100644 index 0000000000..b8ff871118 --- /dev/null +++ b/docs/generated-html/search/all_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_1.js b/docs/generated-html/search/all_1.js new file mode 100644 index 0000000000..b1bf99160e --- /dev/null +++ b/docs/generated-html/search/all_1.js @@ -0,0 +1,31 @@ +var searchData= +[ + ['accesstype',['AccessType',['../structcutlass_1_1FragmentIterator.html#a012c5af3a8a40843c576c55ecbc663e7',1,'cutlass::FragmentIterator::AccessType()'],['../structcutlass_1_1FragmentConstIterator.html#addf5c21444f129211eefe7cdca6dfa1b',1,'cutlass::FragmentConstIterator::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a0b656c41b9fff6402f33e95204ce8860',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a7eccab04c8d3968e74486d0525a3fa02',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#abca5165caae7304f33fcad267c16b002',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a87d46956aa317f06f2ba9a535fdfc5da',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1Load.html#ad0bf2da0c240f3a2a3f4c92162d347ae',1,'cutlass::Load::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a5d7ed0abaeea99ec3399f8eea930f761',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a2b9faed8d92f55a46e313d79d214316d',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a8611550c045d6def964d9dafb2be80c6',1,'cutlass::Load< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a942970f88e13c88f496a9da67ed47a6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store.html#a8d2f927b2b61987dcea40e84f4575942',1,'cutlass::Store::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a89f329ba11f96ee3ce4428cbc792ac3d',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#ac0af6ae18137156abe24d6479232b955',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ad073f5e8252ad24b086f14bd2a109cf9',1,'cutlass::Store< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aeb70e4859e2795b6af63ad5e203b4da9',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1TileIteratorBase.html#abb3dde23971ad35a477b75ee99381b53',1,'cutlass::TileIteratorBase::AccessType()'],['../structcutlass_1_1TileLoadIterator.html#a4af8eeabe7c1ec0362782687a84466e0',1,'cutlass::TileLoadIterator::AccessType()'],['../structcutlass_1_1TileStoreIterator.html#a0e79ed59263ebc3478c43f2f9a50cb5a',1,'cutlass::TileStoreIterator::AccessType()']]], + ['accumulators',['Accumulators',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#afe6bebd94e3379c94054d04c5196edce',1,'cutlass::gemm::GemmEpilogue::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af7ff579ccb4269bfa5e9ae297260f7a2',1,'cutlass::gemm::GemmEpilogueTraits::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a2fadb0ad2e28109ccfa9195e817a4d54',1,'cutlass::gemm::GemmConfig::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a505306c2af2059f6e84ba32d701d1602',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a4712650b46b6183ea60d79ef18f55b86',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a760a5262f419b789540e7bbb2fda4b9d',1,'cutlass::gemm::ThreadMultiplyAdd::Accumulators()']]], + ['accumulatorsperthread',['AccumulatorsPerThread',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a98d0f84730551eaabfe7404b36478b50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a47807c9c9fb43e7f7b5f409a49986c30',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a002b1944b25cc8fe0862f40a8c8555c5',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerThread()']]], + ['accumulatorsperwarp',['AccumulatorsPerWarp',['../structcutlass_1_1gemm_1_1GemmConfig.html#a51d583dfcd645ad0ecfc23b87b3c5108',1,'cutlass::gemm::GemmConfig::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af0c856abdd9f7f26f671493cc629bf0a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a327ce1b7b6478c27c80baf5d9e26bdbc',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#aa83190df3c1639b6dd632cd4b9278d77',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerWarp()']]], + ['additive',['Additive',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375a77d7cc80ec0c3ff42ca9b2aff98a1646',1,'cutlass::Identity']]], + ['advance',['advance',['../classcutlass_1_1TensorRef.html#aab0dafb81a462320e55e0dc4a5886478',1,'cutlass::TensorRef']]], + ['aligned_5f',['aligned_',['../unioncutlass_1_1Vector.html#a9e9352594fcd022526d5b69b6c25c99c',1,'cutlass::Vector::aligned_()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a9e41dbe541a7dddf1e461e0390fe8896',1,'cutlass::Vector< half, kLanes_ >::aligned_()']]], + ['aligned_5fchunk',['aligned_chunk',['../structcutlass_1_1platform_1_1aligned__chunk.html',1,'cutlass::platform']]], + ['aligned_5fstorage',['aligned_storage',['../structcutlass_1_1platform_1_1aligned__storage.html',1,'cutlass::platform']]], + ['alignedstruct',['AlignedStruct',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignedstruct_3c_20kvectorsize_20_3e',['AlignedStruct< kVectorSize >',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignment_5fof',['alignment_of',['../structcutlass_1_1platform_1_1alignment__of.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20value_5ft_20_3e',['alignment_of< const value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20volatile_20value_5ft_20_3e',['alignment_of< const volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double2_20_3e',['alignment_of< double2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double4_20_3e',['alignment_of< double4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20float4_20_3e',['alignment_of< float4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20int4_20_3e',['alignment_of< int4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20long4_20_3e',['alignment_of< long4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong2_20_3e',['alignment_of< longlong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong4_20_3e',['alignment_of< longlong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20uint4_20_3e',['alignment_of< uint4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulong4_20_3e',['alignment_of< ulong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong2_20_3e',['alignment_of< ulonglong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong4_20_3e',['alignment_of< ulonglong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20volatile_20value_5ft_20_3e',['alignment_of< volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html',1,'cutlass::platform']]], + ['alpha',['alpha',['../structcutlass_1_1gemm_1_1GemmDesc.html#a053c2b529be527f510ee317737fbf7e8',1,'cutlass::gemm::GemmDesc::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a3248d6b3d9bcc59365d582b879292a70',1,'cutlass::gemm::LinearScaling::Params::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ab9c51c8b1f06e935a353ac5b1c22cee6',1,'cutlass::gemm::LinearScaling::alpha()']]], + ['at',['at',['../structcutlass_1_1Coord.html#ad10b59430927a354fcd874d2d32f1bd8',1,'cutlass::Coord::at()'],['../structcutlass_1_1Coord.html#ab511a16210d1b94449f5bc6476f6a266',1,'cutlass::Coord::at(int dim)'],['../structcutlass_1_1Coord.html#af9cc7ab2088544d1240ac51c4c6e685d',1,'cutlass::Coord::at() const'],['../structcutlass_1_1Coord.html#aed4f4d1c7c0749fe72736d7a1213b6e9',1,'cutlass::Coord::at(int dim) const'],['../structcutlass_1_1FragmentIterator.html#a9cf31df06ff035705a1341810fcdcbf2',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0) const'],['../structcutlass_1_1FragmentIterator.html#a7bdc407aae8d7360e089af347b585a53',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0)'],['../structcutlass_1_1FragmentConstIterator.html#a8b957150545becacab1b8ead1be29424',1,'cutlass::FragmentConstIterator::at()'],['../structcutlass_1_1PredicateVector.html#ac8eca7087d1f7575b0c6beeb5f907bfd',1,'cutlass::PredicateVector::at()'],['../structcutlass_1_1TrivialPredicateTileAdapter.html#a3e41ab145489df08fca79251b2253d0f',1,'cutlass::TrivialPredicateTileAdapter::at()'],['../structcutlass_1_1PredicateTileAdapter.html#a7d54e877bca2e840c142293b4826e986',1,'cutlass::PredicateTileAdapter::at()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a9e5651009a7b8df9960527c18c7b05dd',1,'cutlass::ConstPredicateTileAdapter::at()'],['../classcutlass_1_1TensorRef.html#a7eff42a37e4dbee488bfa726f3f0df4f',1,'cutlass::TensorRef::at(Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a5702dea703104ab431c098c7b039c215',1,'cutlass::TensorRef::at(int idx) const'],['../classcutlass_1_1TensorView.html#ad894a8b373c413d308cb1b7c7ba545ce',1,'cutlass::TensorView::at(Coord_t const &coord) const'],['../classcutlass_1_1TensorView.html#acc55581896fae8c0449b44b56d750155',1,'cutlass::TensorView::at(Offset_t idx) const']]] +]; diff --git a/docs/generated-html/search/all_10.html b/docs/generated-html/search/all_10.html new file mode 100644 index 0000000000..50bc449e12 --- /dev/null +++ b/docs/generated-html/search/all_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_10.js b/docs/generated-html/search/all_10.js new file mode 100644 index 0000000000..19828a38f9 --- /dev/null +++ b/docs/generated-html/search/all_10.js @@ -0,0 +1,20 @@ +var searchData= +[ + ['rank',['Rank',['../classcutlass_1_1TensorRef.html#a22ac53a60e63a743613e732586ad0c66',1,'cutlass::TensorRef::Rank()'],['../classcutlass_1_1TensorView.html#a22c39e8cf314884c5d523914cf4cac90',1,'cutlass::TensorView::Rank()']]], + ['ref',['ref',['../classcutlass_1_1TensorView.html#a8650860460ea24944c803a671095be09',1,'cutlass::TensorView::ref()'],['../classcutlass_1_1TensorView.html#a5cbff89d3d8dc71d27a4d6c1d7abb58a',1,'cutlass::TensorView::ref() const']]], + ['registers',['registers',['../unioncutlass_1_1Vector.html#a29dab07949206cc1609543ffcefd1e5a',1,'cutlass::Vector::registers()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#abd116dc7a5b82ac9b1481fb1d2bfc93f',1,'cutlass::Vector< half, kLanes_ >::registers()']]], + ['release',['release',['../classcutlass_1_1platform_1_1unique__ptr.html#a7ac06ebe7bc66573d3225891e12d2279',1,'cutlass::platform::unique_ptr']]], + ['remove_5fconst',['remove_const',['../structcutlass_1_1platform_1_1remove__const.html',1,'cutlass::platform']]], + ['remove_5fconst_3c_20const_20t_20_3e',['remove_const< const T >',['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['remove_5fcv',['remove_cv',['../structcutlass_1_1platform_1_1remove__cv.html',1,'cutlass::platform']]], + ['remove_5fvolatile',['remove_volatile',['../structcutlass_1_1platform_1_1remove__volatile.html',1,'cutlass::platform']]], + ['remove_5fvolatile_3c_20volatile_20t_20_3e',['remove_volatile< volatile T >',['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['reset',['reset',['../classcutlass_1_1TensorRef.html#abefe392e81da2c09cb127f963ae90674',1,'cutlass::TensorRef::reset()'],['../classcutlass_1_1TensorView.html#a8b1785a1ea5d7aa7eba8e45297d539d3',1,'cutlass::TensorView::reset()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a6740f71511f5495d6038cf8878862331',1,'cutlass::platform::unique_ptr::reset()']]], + ['reshape_5ftile_2eh',['reshape_tile.h',['../reshape__tile_8h.html',1,'']]], + ['reshapethreads',['ReshapeThreads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html',1,'cutlass::gemm']]], + ['reshapethreads_3c_20tile_5f_2c_20threads_5f_2c_20true_20_3e',['ReshapeThreads< Tile_, Threads_, true >',['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html',1,'cutlass::gemm']]], + ['reshapetile',['ReshapeTile',['../structcutlass_1_1ReshapeTile.html',1,'cutlass']]], + ['reshapetile_3c_20tile_5f_2c_20kaccesssize_5f_2c_20true_20_3e',['ReshapeTile< Tile_, kAccessSize_, true >',['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html',1,'cutlass']]], + ['residue',['residue',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aae1adef6312e069e59a83d38c03116f9',1,'cutlass::gemm::GlobalLoadStreamBase::residue()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aab37ea6c47e34466371314ed3971dc7b',1,'cutlass::gemm::GemmGlobalIteratorAb::residue()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a405b93680bb6e356369863244d0b56aa',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::residue()']]], + ['round_5fnearest',['round_nearest',['../namespacecutlass.html#a17c8c408d672d26f1c70d2435f6ac83e',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_11.html b/docs/generated-html/search/all_11.html new file mode 100644 index 0000000000..b35c8bf0e8 --- /dev/null +++ b/docs/generated-html/search/all_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_11.js b/docs/generated-html/search/all_11.js new file mode 100644 index 0000000000..4f0bed2ee5 --- /dev/null +++ b/docs/generated-html/search/all_11.js @@ -0,0 +1,89 @@ +var searchData= +[ + ['scalar',['Scalar',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0d38914bf97084e04102e7897aee4295',1,'cutlass::gemm::GemmEpilogue::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a006e50cf5fb67407d41c60d6d08b8b66',1,'cutlass::gemm::GemmEpilogueTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae2b82b9b62aefa15005091bb84ac20e8',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afbbf15a7b5e4c38e59bf1debf67f04d6',1,'cutlass::gemm::GlobalLoadStreamBase::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a6894b653fffa59bcb847bc3295643d6b',1,'cutlass::gemm::GemmGlobalTileTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a5817b81c7013db9a3f7394ad4b1db79a',1,'cutlass::gemm::GemmGlobalIteratorAb::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6b5b207eb1147e9669215e192901df9e',1,'cutlass::gemm::GemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a8b04fd003fc2db46d749360e8838438b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aaa439a0bb6b9de5e2722ea7b011effea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a1b6956adc65254202864520b668edd14',1,'cutlass::gemm::GemmSharedLoadTileATraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a2a6065e583155b3e389253d3bfb64d73',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9a2218b570dada2f1e3ccd8004c47856',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1b025cb056729706f36469e74a9799dc',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af511f0ff83166b2a77d4cad4150c8e8f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ac618881d66790e4c280dc5692e5ddf95',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a8ae7db3f2f0c57779729d500386c004c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7639ccd7f6419a9f232db173a228e756',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ae4128bba3f1df6ef7824e2db79745b00',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html#ab1068ba72468f9ede1d05ba41ea31317',1,'cutlass::gemm::IgemmEpilogueScalar::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html#a0983fd25494f6a7ed5af37a02e99f650',1,'cutlass::gemm::IgemmEpilogueScalar< int >::Scalar()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ae6b053ca059932f7c0d3c99243854183',1,'cutlass::gemm::LinearScaling::Scalar()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab9979f3f1f6d31e1466780c5777de25e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1TileIteratorBase.html#a17163e93d7d3616b4950925f72bb4c16',1,'cutlass::TileIteratorBase::Scalar()'],['../structcutlass_1_1TileLoadIterator.html#ae8dff52e619f06fbdbca8cb847c79895',1,'cutlass::TileLoadIterator::Scalar()'],['../structcutlass_1_1TileStoreIterator.html#ad52318b430437575b55099ca992ca3a7',1,'cutlass::TileStoreIterator::Scalar()'],['../unioncutlass_1_1Vector.html#a56875d7cbf921261e68e1f63212db5bd',1,'cutlass::Vector::Scalar()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a03199df1287d263f7267239c014f1d9b',1,'cutlass::Vector< half, kLanes_ >::Scalar()'],['../structcutlass_1_1VectorTraits.html#ab3b49d7fb52050c13e50e3c75bf72599',1,'cutlass::VectorTraits::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aaf35570b10829356762dcec925a5b4bc',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a6e99dde8432b13472971dc41573a574e',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar()']]], + ['scalara',['ScalarA',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a6fa76b3e7ac721d47df47eba4e9ef222',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarA()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a366083b229b28e7f44da38273b2ab263',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA()'],['../structcutlass_1_1gemm_1_1Gemm.html#a6fcf9daef57558e1bb932c6eba99721b',1,'cutlass::gemm::Gemm::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a9d1e4e364be8fd9de5e1199d93ad76aa',1,'cutlass::gemm::GemmConfig::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a96d64bdc48db4971798b620d6b49b3f6',1,'cutlass::gemm::GemmTraits::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a236a408791a38358cbadf19dd0e8ed9f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aeef5fa0437b4ce1c2e8ac4bc7e062b65',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a382242001b4c8e18ea5f2de724902217',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarA()']]], + ['scalarb',['ScalarB',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af4f5c4a79c447e5aaf313878eca022cb',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarB()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af52ec4b92a3e788169764014aebb85a1',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB()'],['../structcutlass_1_1gemm_1_1Gemm.html#ae6f11bb666c2c8510e99200a2c0fc2f4',1,'cutlass::gemm::Gemm::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#aa13d6f5e5ad907ef09c88ae49e6e8e9b',1,'cutlass::gemm::GemmConfig::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#aa0e8fd28f5247764dfb7843f7670c698',1,'cutlass::gemm::GemmTraits::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ac7557562de1108bf1abc10829c83e88f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aaf9e4b8b16150a6ad826c228af2bf103',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a42d181e7f4d0d0a15e1c911d3498b767',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarB()']]], + ['scalarc',['ScalarC',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a92c1ffbfb479cd9fa2c2632ef8e347d3',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarC()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af553be8ef0b4dc9bb593d98dfce8628d',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC()'],['../structcutlass_1_1gemm_1_1Gemm.html#a71f0c91768a1a87e94030c8c2db51e55',1,'cutlass::gemm::Gemm::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#abb0741601652df8fdf927d49c2c0e4d0',1,'cutlass::gemm::GemmEpilogue::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#abf97949c238d72854225c1c6131b5cbc',1,'cutlass::gemm::GemmEpilogueTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#ad8f262d7da093d07cdd5c6a4fd9aceea',1,'cutlass::gemm::GemmConfig::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a8f78d4a68817760099081523aa7fd443',1,'cutlass::gemm::GemmTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af1a6d91d4734683ea791bf57f3c3bbb0',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#acdd554e996a712ff62eb70d6ecf8e116',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a1af758cb98c33060462a2706856b0a01',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarC()']]], + ['scalard',['ScalarD',['../structcutlass_1_1gemm_1_1Gemm.html#ae2aa3663f9f6f5708e816dcf7cd66694',1,'cutlass::gemm::Gemm::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4887b56a96694ce6350db77f78bb505f',1,'cutlass::gemm::GemmEpilogue::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a1ee74d6f89b044578e1cd6dd210ce5fe',1,'cutlass::gemm::GemmEpilogueTraits::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a188ef7f4c49ff2830753218343a1b8f8',1,'cutlass::gemm::GemmConfig::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a3129be75ee087603170f8367e10e070e',1,'cutlass::gemm::GemmTraits::ScalarD()']]], + ['scalarepilogue',['ScalarEpilogue',['../structcutlass_1_1gemm_1_1Gemm.html#a9349fc5f20215c1c6508e250b0b4e936',1,'cutlass::gemm::Gemm']]], + ['scalars',['scalars',['../unioncutlass_1_1Vector.html#a091080b4e9db9e89734f44ceb985d78f',1,'cutlass::Vector::scalars()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab4a119a4813f80aa10c25e32f8b115f3',1,'cutlass::Vector< half, kLanes_ >::scalars()']]], + ['set',['set',['../classcutlass_1_1PredicateVector_1_1Iterator.html#aadfd039b5622098c9e46706a27122575',1,'cutlass::PredicateVector::Iterator::set()'],['../structcutlass_1_1PredicateVector.html#a062fa8a8df725ef08ced2ffcca8336af',1,'cutlass::PredicateVector::set()'],['../structcutlass_1_1PredicateTileAdapter.html#aeda47efdda0387f9c3c7b31f836afca5',1,'cutlass::PredicateTileAdapter::set()']]], + ['sgemm_5ftraits_2eh',['sgemm_traits.h',['../sgemm__traits_8h.html',1,'']]], + ['sgemmconfig',['SgemmConfig',['../structcutlass_1_1gemm_1_1SgemmConfig.html',1,'cutlass::gemm']]], + ['sgemmtraits',['SgemmTraits',['../structcutlass_1_1gemm_1_1SgemmTraits.html',1,'cutlass::gemm']]], + ['shape',['Shape',['../structcutlass_1_1Shape.html',1,'cutlass::Shape< kD_, kH_, kW_, kC_ >'],['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a89f1d9599b418c8bb81c104ca86cf00e',1,'cutlass::gemm::GemmMultiplicandTraits::Shape()'],['../structcutlass_1_1ShapeScale.html#aae9cfc35c517cd89018e4f914acbac29',1,'cutlass::ShapeScale::Shape()'],['../structcutlass_1_1ShapeAdd.html#ad4712a1339445038949445de1dd74e71',1,'cutlass::ShapeAdd::Shape()'],['../structcutlass_1_1ShapeSub.html#a24b6dd8cb6171b85c4e2f37407f9a5c9',1,'cutlass::ShapeSub::Shape()'],['../structcutlass_1_1ShapeMul.html#a8875fc5e861339f981360ed774e8cc94',1,'cutlass::ShapeMul::Shape()'],['../structcutlass_1_1ShapeDiv.html#a108ded386ef6708afc6fe769a77a234b',1,'cutlass::ShapeDiv::Shape()'],['../structcutlass_1_1ShapeMax.html#ad566aceac2563024982eeabb78c6c961',1,'cutlass::ShapeMax::Shape()'],['../structcutlass_1_1ShapeMin.html#a5c813e4c34ea612431d31b36120f8549',1,'cutlass::ShapeMin::Shape()'],['../structcutlass_1_1ShapeStrides.html#ac6fcda9b8e1782f24c1e6d67cd880a6a',1,'cutlass::ShapeStrides::Shape()']]], + ['shape_2eh',['shape.h',['../shape_8h.html',1,'']]], + ['shapeadd',['ShapeAdd',['../structcutlass_1_1ShapeAdd.html',1,'cutlass']]], + ['shapecount',['ShapeCount',['../structcutlass_1_1ShapeCount.html',1,'cutlass']]], + ['shapediv',['ShapeDiv',['../structcutlass_1_1ShapeDiv.html',1,'cutlass']]], + ['shapemax',['ShapeMax',['../structcutlass_1_1ShapeMax.html',1,'cutlass']]], + ['shapemin',['ShapeMin',['../structcutlass_1_1ShapeMin.html',1,'cutlass']]], + ['shapemul',['ShapeMul',['../structcutlass_1_1ShapeMul.html',1,'cutlass']]], + ['shapescale',['ShapeScale',['../structcutlass_1_1ShapeScale.html',1,'cutlass']]], + ['shapestrides',['ShapeStrides',['../structcutlass_1_1ShapeStrides.html',1,'cutlass']]], + ['shapesub',['ShapeSub',['../structcutlass_1_1ShapeSub.html',1,'cutlass']]], + ['shared',['shared',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#afabd328b106d45b156200f73942d211e',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['shared_5fiterator_5fload',['shared_iterator_load',['../namespacecutlass.html#abcec976c59cab75ca55b338d125154a3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#aa9416026c6db08d92a34c2ac08fea8c3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d)']]], + ['shared_5fiterator_5fstore',['shared_iterator_store',['../namespacecutlass.html#a705c6d75513e112d2731d1c40f4cf109',1,'cutlass']]], + ['shared_5fload_5ffence',['shared_load_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9b5e42f222fec98ff479bc1650221b84',1,'cutlass::gemm::GemmEpilogue::shared_load_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a475463c1e3af71598e22da8956900ebe',1,'cutlass::gemm::GemmTraits::shared_load_fence()']]], + ['shared_5fload_5fiterator_5fd',['shared_load_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a1742e43c128665f0ca39cb578291df81',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstorage',['shared_storage',['../structcutlass_1_1gemm_1_1Gemm.html#a6b0119ed8d92698dab4de68987c8cc1b',1,'cutlass::gemm::Gemm::shared_storage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a442b5b5688cd658c3b3476650c00281e',1,'cutlass::gemm::GemmEpilogue::shared_storage()']]], + ['shared_5fstore_5ffence',['shared_store_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac1b2a16b4ccf3e9617faf4d8a2c43691',1,'cutlass::gemm::GemmEpilogue::shared_store_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ac3c840a3d90c0da43301761af83c2c9f',1,'cutlass::gemm::GemmTraits::shared_store_fence()']]], + ['shared_5fstore_5fiterator_5fd',['shared_store_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#af79a0c74a4c30ccec59b393721b5dfc1',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstream',['shared_stream',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html#ae63b5a52106dbd37ea304196335ec210',1,'cutlass::gemm::GemmEpilogueTraits::SharedStorage']]], + ['shared_5fstream_5fa',['shared_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aa9937ec51d18aad02398d95095117978',1,'cutlass::gemm::GemmTraits::Params']]], + ['shared_5fstream_5fb',['shared_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a78f22007632937bbd5f3dab7b097477d',1,'cutlass::gemm::GemmTraits::Params']]], + ['sharedloaditeratora',['SharedLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a365aed4c0e2ad1bffea517ee36998557',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1bbb198a50b5f01a0502df44bb678620',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#aa93043ac87d89ce7fb991c9195c3bf99',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorA()']]], + ['sharedloaditeratorb',['SharedLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a4de905aadc734df69fd0db83f01be56e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8d09409973094ca2a17633776a64a303',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a42322b9b10e894fe157e527b378c59f8',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorB()']]], + ['sharedloaditeratord',['SharedLoadIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4a0b439f8a57d8e67174ecbd96183070',1,'cutlass::gemm::GemmEpilogue::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a9822fa405b32cc2f471c9fdd37585cb5',1,'cutlass::gemm::GemmEpilogueTraits::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#adbff60de6f90ef4d5ae0c7096692e2c0',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad33ee44527a7fcfd41b4e677927fd4fa',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadIteratorD()']]], + ['sharedloadstream',['SharedLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6e097738679436d580e8dc6ac70efaad',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a93e9bcdca4ceb68754fb1f73e2b25d25',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream(Params const &params, SharedStorage &shared_storage)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a49315aea1c54d84ff19b0ac215128b95',1,'cutlass::gemm::GemmTraits::SharedLoadStream::SharedLoadStream()']]], + ['sharedloadstreama',['SharedLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ae01371eb31b88fa83c4926564cecafdc',1,'cutlass::gemm::GemmTraits::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aa5ebe3a857b55412a86ec65ad1c55dd8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a21c860cc877df13d22dd30eeb5e2b06b',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a70063eb7e19921efef55a6f32562773f',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamA()']]], + ['sharedloadstreamb',['SharedLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#acaeb27063a444e2a3b93f3cb70e3c290',1,'cutlass::gemm::GemmTraits::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a12447ce4d11601a625662f9d177cc3d8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ac5eeca1e91f0e0d4dd48d432d5213215',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a54e8ad5874306a3764951a9791f02c96',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamB()']]], + ['sharedloadtiletraits',['SharedLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ab8ba28fd1da48fcabbafc0de91281b46',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af534fc5698513af3c6724b68ae03316d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a1125408805bc697755f2b16594c6c8e1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a118bb34a6f58c3e5a989773b4b597d8c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a9335aca8b152ff1167763de8ff8fb882',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a458cbcc16fc296d024f2a1a95fb926c1',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af1bc7f7c26db3399201cd95f35a56790',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a851113bffb5b656c5c649845852b3b8d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadTileTraits()']]], + ['sharedloadtransformerd',['SharedLoadTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a132cabbc1402c87c7b35dea427001a13',1,'cutlass::gemm::GemmEpilogue']]], + ['sharedstorage',['SharedStorage',['../structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html',1,'cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage'],['../structcutlass_1_1gemm_1_1Gemm.html#ad10627d508fad0efae1fb91b26d7a6b7',1,'cutlass::gemm::Gemm::SharedStorage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac36dad8a7b6bc7fc6ef88e44068468dc',1,'cutlass::gemm::GemmEpilogue::SharedStorage()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a22c671494d487511c71f2b0f26fdb404',1,'cutlass::gemm::SharedLoadStream::SharedStorage()'],['../structcutlass_1_1TileLoadIterator.html#ab457bd7953af9ef418510f55f52d1f39',1,'cutlass::TileLoadIterator::SharedStorage()'],['../structcutlass_1_1TileStoreIterator.html#ab7922305d47b67e6cfb439e4e8d9f09b',1,'cutlass::TileStoreIterator::SharedStorage()']]], + ['sharedstorefragmentd',['SharedStoreFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5e64440830b36899f9c0ed8b369665c8',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['sharedstoreiteratora',['SharedStoreIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a3a20852daeb46c625b2391d078b30d73',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7f022d423d42d4081cefa7eb26b4d5b4',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ae187303a8da63f36960687a4730f4c46',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorA()']]], + ['sharedstoreiteratorb',['SharedStoreIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a43713f534798b1e27c4ba38b72e63c08',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abe3383e7338c08841fd8f0bfb1090448',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a4d6658f3a3b53760b10a3da9c807b81f',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorB()']]], + ['sharedstoreiteratord',['SharedStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aab0a964efe223c5c29bc816c393b5a9a',1,'cutlass::gemm::GemmEpilogue::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a74f4beb86447f6b613e9b60234cb27bc',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a02a517fd246fb961727d3bd1b4f954be',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#af7024128202d642d3535e1ae5cf5f43d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreIteratorD()']]], + ['sharedstorestorage',['SharedStoreStorage',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a69092e298d5723028fc24235d72f87fa',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['sharedstorestoragea',['SharedStoreStorageA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a8d49ad32fc9d8c14f6141690962c3f9c',1,'cutlass::gemm::GemmTraits']]], + ['sharedstorestorageb',['SharedStoreStorageB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a438b80cd8d8df0e74014ae47a162f7ed',1,'cutlass::gemm::GemmTraits']]], + ['sharedstoretiletraits',['SharedStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a3a0fb3a914bfd009ff2e3918bcd231a9',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aaa198fed841af6bf26bf2e9544d0a877',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad6511b7c2d84a9f6c3ed3639269ac44f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1884cbc21987aec651fa8149d4ed1a06',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#acbeea56f0ce95ddd632db3482c1021e5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a2aad3b2454d956f20dac1bb0ad75a2f8',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ab1ae3d51f65f7af60147da1c51a7a0c2',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad7659dc0eaa491447ad127ef7098924f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a7624585480f83a46725c92b5dee20ebc',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aca6118b5bbe6f667f05c53bd52543045',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()']]], + ['sharedstoretransformerd',['SharedStoreTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9063e7fc044a679652d5a3a31aa77e7c',1,'cutlass::gemm::GemmEpilogue::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a0b8ac1972b2f2cff48070f8b862ed25c',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aa5cea8dbebda9a12a503ae1416c4da33',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a00000e0cd14b9e6e242eafb5133af8cf',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTransformerD()']]], + ['simplifiedgemmepiloguetraits',['SimplifiedGemmEpilogueTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits',['SimplifiedGemmTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits_3c_20klayouta_5f_2c_20klayoutb_5f_2c_20gemmconfig_5f_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20index_5f_20_3e',['SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ >',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraitshelper',['SimplifiedGemmTraitsHelper',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html',1,'cutlass::gemm']]], + ['size',['size',['../classcutlass_1_1TensorView.html#a541a7c22e7109d4059044f146fe69027',1,'cutlass::TensorView::size() const'],['../classcutlass_1_1TensorView.html#a6218d8555679966eab784a6bb1fa4ed1',1,'cutlass::TensorView::size(int dim) const']]], + ['skew',['Skew',['../structcutlass_1_1TileIteratorBase.html#ae89afbcf642b3023770ff22969c51d16',1,'cutlass::TileIteratorBase::Skew()'],['../structcutlass_1_1TileLoadIterator.html#a11ec4297c9a1352c8005ac222892b35c',1,'cutlass::TileLoadIterator::Skew()'],['../structcutlass_1_1TileStoreIterator.html#a57348779bb004ed1ea0fd9cc252e895d',1,'cutlass::TileStoreIterator::Skew()']]], + ['sqrt_5fest',['sqrt_est',['../structcutlass_1_1sqrt__est.html',1,'cutlass']]], + ['stage',['stage',['../structcutlass_1_1TileLoadIterator.html#aa3fd9859de68d76e07ebee06c6ccee92',1,'cutlass::TileLoadIterator::stage()'],['../structcutlass_1_1TileStoreIterator.html#ae435b72b15eca46eb871446d92bd316e',1,'cutlass::TileStoreIterator::stage()']]], + ['static_5fassert',['static_assert',['../platform_8h.html#adde4c9ea91b753491851361a4198c009',1,'platform.h']]], + ['storage',['Storage',['../structcutlass_1_1PredicateVector.html#afe85a07b9f311327c6bf04e3a5f94e5a',1,'cutlass::PredicateVector::Storage()'],['../classcutlass_1_1TensorRef.html#a604921388cb7ee18ddb8127b8ca2f7fd',1,'cutlass::TensorRef::Storage()'],['../structcutlass_1_1TileIteratorBase.html#a6ca47fd6e2f9cbb3498c138417ea414a',1,'cutlass::TileIteratorBase::Storage()']]], + ['storagetype',['StorageType',['../structcutlass_1_1StorageType.html',1,'cutlass']]], + ['storagetype_3c_201_20_3e',['StorageType< 1 >',['../structcutlass_1_1StorageType_3_011_01_4.html',1,'cutlass']]], + ['storagetype_3c_202_20_3e',['StorageType< 2 >',['../structcutlass_1_1StorageType_3_012_01_4.html',1,'cutlass']]], + ['storagetype_3c_204_20_3e',['StorageType< 4 >',['../structcutlass_1_1StorageType_3_014_01_4.html',1,'cutlass']]], + ['store',['Store',['../structcutlass_1_1Store.html',1,'cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#a1f31090613c4e6f0895f598880d6c4e5',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage::store()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#a118c78aa6b0ae0f0c78889689b6878c8',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a45319520b7d341c66bd54d3e8fec48f8',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1Store.html#a1117fa7b7bdeeb3a7f2d647a1d340aaf',1,'cutlass::Store::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a00f6bb93d318bf4cff35c9dabc630167',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a027980b8456243974b0c442866a66e3a',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::store()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ab70d04589637f285f861902f649f834e',1,'cutlass::Store< double, 2, Memory_, true, 16 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aa130564bb2eba7b07e1f183c98f1d9e2',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::store()'],['../structcutlass_1_1TileStoreIterator.html#a53820de506cecb1f5fb07b3385d8272a',1,'cutlass::TileStoreIterator::store(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileStoreIterator.html#a60258b7c1a1708f97e28f8f6c292bfe4',1,'cutlass::TileStoreIterator::store(Fragment &fragment) const']]], + ['store_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['store_5fiterator',['store_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a3e5167fa3f2dc0d8b4b903bd4e936969',1,'cutlass::gemm::GlobalLoadStreamBase::Params::store_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a939e9ddecc5ee97882a54211a61f5586',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::store_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0eafd1e245946bd1b9d228ad7d2d0dae',1,'cutlass::gemm::GlobalLoadStreamBase::store_iterator()']]], + ['store_5fpost_5fincrement',['store_post_increment',['../structcutlass_1_1TileStoreIterator.html#a57aa2c36eb6ad9d2500c1f5396b3a526',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileStoreIterator.html#ae63949f58c1b32959bbfa5b64d521f0f',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment)']]], + ['storeiterator',['StoreIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a15eee5bf6367a36a5b5c8024437f4834',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['stream_5fa',['stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a62d3dcf5d97a0a896b2033e55dfb0811',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a82a59524b5d3134eb609d280193a5c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8e68561561ac6b08efbfd116903198c8',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_a()']]], + ['stream_5fb',['stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a0173fcc8856b17a52cc5eee845f101fa',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#acc287ce5e2f3635d9d55d91914d2d04c',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a1fdc6af44c14c88a94529d187fda176d',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_b()']]], + ['streamsharedstorage',['StreamSharedStorage',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage']]], + ['streamsharedstorage_3c_20globalloadstreama_2c_20sharedloadstreama_20_3e',['StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['streamsharedstorage_3c_20globalloadstreamb_2c_20sharedloadstreamb_20_3e',['StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['stride',['stride',['../classcutlass_1_1TensorRef.html#a89380141d25528c4c7ba6c365b96a878',1,'cutlass::TensorRef::stride() const'],['../classcutlass_1_1TensorRef.html#af47f192552544272774a29d7a0829a31',1,'cutlass::TensorRef::stride(int dim) const'],['../classcutlass_1_1TensorView.html#a3ac125a25199fd91f73d2cfe9fc3d09b',1,'cutlass::TensorView::stride() const'],['../classcutlass_1_1TensorView.html#a522630bb0df977282a9bff17e6fee843',1,'cutlass::TensorView::stride(int dim) const']]], + ['stride_5fd',['stride_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad67234ec264354a22032bb2519575dc1',1,'cutlass::TileIteratorBase::Params']]], + ['stride_5fh',['stride_h',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#ae0fdc7426b22ff2c20f077e251ebc823',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a0c6b03c635e14ad4424a83f8c7f8025e',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5cff0436eed0fefa2957ad6d083ed007',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a58e8c883aea4cfdfa5a84c25a4704ebc',1,'cutlass::TileIteratorBase::Params::stride_h()']]], + ['stride_5fw',['stride_w',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a565f6cab8925d632dcf24bd1974caca2',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_w()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a313984457c78eea66c980f6813047b9c',1,'cutlass::TileIteratorBase::Params::stride_w()']]], + ['strides',['Strides',['../structcutlass_1_1FragmentIterator.html#a2858ba9a8a9bbaef1de73415cff9b3c1',1,'cutlass::FragmentIterator']]], + ['subview',['subview',['../classcutlass_1_1TensorView.html#aee43c516397d7c06eb8012711d8d7c15',1,'cutlass::TensorView']]], + ['swap',['swap',['../classcutlass_1_1platform_1_1unique__ptr.html#a748d413c50bdbbe9e2f9986fbc423036',1,'cutlass::platform::unique_ptr::swap()'],['../namespacecutlass_1_1platform.html#a3e83320a39137d92042eb0bf93be9678',1,'cutlass::platform::swap()']]], + ['swizzle',['swizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#a0a366c072ee66bbcb390acd7b8bbe5f8',1,'cutlass::gemm::IdentityBlockSwizzle']]] +]; diff --git a/docs/generated-html/search/all_12.html b/docs/generated-html/search/all_12.html new file mode 100644 index 0000000000..fd265245b6 --- /dev/null +++ b/docs/generated-html/search/all_12.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_12.js b/docs/generated-html/search/all_12.js new file mode 100644 index 0000000000..cd80c9c193 --- /dev/null +++ b/docs/generated-html/search/all_12.js @@ -0,0 +1,54 @@ +var searchData= +[ + ['tensor_5fref_2eh',['tensor_ref.h',['../tensor__ref_8h.html',1,'']]], + ['tensor_5fview_2eh',['tensor_view.h',['../tensor__view_8h.html',1,'']]], + ['tensorref',['TensorRef',['../classcutlass_1_1TensorRef.html',1,'cutlass::TensorRef< Storage_, Rank_ >'],['../classcutlass_1_1TensorRef.html#a54f6edc293b0b8ac97f02e8ab951c478',1,'cutlass::TensorRef::TensorRef()'],['../classcutlass_1_1TensorRef.html#ae48325312183ff61dbd312c64f31fcb8',1,'cutlass::TensorRef::TensorRef(Storage *ptr, Coord< Rank > stride)']]], + ['tensorref_3c_20t_2c_204_20_3e',['TensorRef< T, 4 >',['../classcutlass_1_1TensorRef.html',1,'cutlass']]], + ['tensorref_5ft',['TensorRef_t',['../classcutlass_1_1TensorView.html#a762fc3d887ab14f4c7bcde85f0af16ab',1,'cutlass::TensorView']]], + ['tensorview',['TensorView',['../classcutlass_1_1TensorView.html',1,'cutlass::TensorView< T >'],['../classcutlass_1_1TensorView.html#a22401348796d603546e44d6c196018dc',1,'cutlass::TensorView::TensorView()'],['../classcutlass_1_1TensorView.html#a80480aa986a488a106a9b0aea331c317',1,'cutlass::TensorView::TensorView(TensorRef_t const &_ref, Coord_t const &_size)']]], + ['this_5f',['This_',['../structcutlass_1_1Fragment.html#a32f7ff86b73576a15c5ddaa40c4e0a95',1,'cutlass::Fragment::This_()'],['../structcutlass_1_1FragmentIterator.html#ae320d9672450f5341abcdb24a8b09369',1,'cutlass::FragmentIterator::This_()'],['../structcutlass_1_1FragmentConstIterator.html#add14f695231c2bdd6284bf22b1e66f8f',1,'cutlass::FragmentConstIterator::This_()'],['../structcutlass_1_1gemm_1_1Gemm.html#a26c13e8bbad805760443ef6df475e317',1,'cutlass::gemm::Gemm::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2892be253a3de5bffc3edcef2890d3a8',1,'cutlass::gemm::GemmGlobalIteratorAb::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6a745d66c4c7de352041f779e54e6b2b',1,'cutlass::gemm::GemmGlobalIteratorCd::This_()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa8b453116c2d96ea2c56e08cb981346c',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::This_()']]], + ['thread_5fmultiply_5fadd_2eh',['thread_multiply_add.h',['../thread__multiply__add_8h.html',1,'']]], + ['thread_5foffset',['thread_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1864c5556529afdc8445021cad780b04',1,'cutlass::gemm::GemmGlobalIteratorAb::thread_offset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56601dc34e8f9a070db5dc48c37d55a0',1,'cutlass::gemm::GemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab3057dad7a4decb5594c66aa328f8066',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1TileLoadIterator.html#a7726cdd4fe056c59bb04adb9e5504457',1,'cutlass::TileLoadIterator::thread_offset()'],['../structcutlass_1_1TileStoreIterator.html#a350f5beea87d811f43c55519bc0b9035',1,'cutlass::TileStoreIterator::thread_offset()']]], + ['threadblocktile',['ThreadBlockTile',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a5e43f3c9aa8d7dc5f01dfc63b1ea97dc',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['threadmultiplyadd',['ThreadMultiplyAdd',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#acec155117a56c942c5e695984b0f072d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a9b75e499f4c14369b5c86051dceeb81d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ab271a3f11ccde4b629ddb11b78c0d555',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadMultiplyAdd()']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20half_2c_20half_2c_20half_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html',1,'cutlass::gemm']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html',1,'cutlass::gemm']]], + ['threadoffset',['ThreadOffset',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html',1,'cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afd09d3b8e5ca04eab7edc2e5723816e5',1,'cutlass::gemm::GemmGlobalIteratorAb::ThreadOffset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6d985f8e93be21e56f72ec1400d73df1',1,'cutlass::gemm::GemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a667cae4a9fa78a6df073f5ee48ef9664',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1TileTraits.html#af9c0fc178dac7f9dac8d254da34e04dd',1,'cutlass::TileTraits::ThreadOffset()'],['../structcutlass_1_1TileIteratorBase.html#a5abf4755aee07dc58b1d6183fbf4786f',1,'cutlass::TileIteratorBase::ThreadOffset()'],['../structcutlass_1_1TileLoadIterator.html#a8a1527b4b469ae1f97afde2502ece70d',1,'cutlass::TileLoadIterator::ThreadOffset()'],['../structcutlass_1_1TileStoreIterator.html#a6a6f51f459f98c0cddeacf476660cd27',1,'cutlass::TileStoreIterator::ThreadOffset()'],['../structcutlass_1_1TileTraitsStrideMajor.html#ae8d14a3c6871072febfd75ed08aba32c',1,'cutlass::TileTraitsStrideMajor::ThreadOffset()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a823ba83e9ca680da0af7d63be772a351',1,'cutlass::TileTraitsContiguousMajor::ThreadOffset()']]], + ['threads',['Threads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html#afd3614ff45f0fc77ad4967951cb5ab57',1,'cutlass::gemm::ReshapeThreads::Threads()'],['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html#a894932ad04fae3aea06eb6d259e01c1c',1,'cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a29bd05960cc541bb67098f5483c84cf6',1,'cutlass::gemm::GemmGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a9aff3e2ff0db5a5169257e964e5895c6',1,'cutlass::gemm::GemmGlobalTileCdTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a33e4dcd4449f324fed5ceaa2cde01b50',1,'cutlass::gemm::GemmGlobalIteratorAb::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#afdd08b4f4c1feaa426f997d15cd28c02',1,'cutlass::gemm::GemmGlobalIteratorCd::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a1acf2a1d8bf73fda142e7d82e05f00a2',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a9bef06b59f27c6e673066a7f0280aa06',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ae7a4f120805421ac0712604723612b7e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a5fd1a9f132c7aa0f68e129553f519d1e',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aeb866237318ac7983e554a08395c5125',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Threads()']]], + ['threadsdelta',['ThreadsDelta',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a65f9ccd630dde0c9db5358cfc951583d',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#ae2f8331619e735e620f8a8cf2cdde077',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a6eee97f03dcea1c441116e143cf58018',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a2bb0f0820e52417ff77e7a2bdb9ed434',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadsDelta()']]], + ['threadshape',['ThreadShape',['../structcutlass_1_1TileTraitsStrideMajor.html#a03567f41ce616ebb4cdb309c85820599',1,'cutlass::TileTraitsStrideMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a33116b67e580292d4e354ca17ecd4167',1,'cutlass::TileTraitsContiguousMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad6619e0b5d876fafd51c78e39f2c029e',1,'cutlass::TileTraitsWarpRake::ThreadShape()']]], + ['threadsperwarp',['ThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0761c497c41a45652368fc0d54def98f',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aed92656a074e915d97a1b6a990aeba66',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#adf72ea773b8d4d3eb184f59c8cdf9543',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9022ffc49b32503fd3639341e7e291a3',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa784f29ff453c1656fdea8270454fa55',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a5bc98fd196c1f1e4e3f1bfc621df4f50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ad2fbba0a70da29af27ed4578577abc5e',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadsPerWarp()']]], + ['threadsstrides',['ThreadsStrides',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae540e7ea7106552682aa4c97b833b3b1',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadsStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a2053e4b9cb3ed2727c89960354ea0b29',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadsStrides()']]], + ['tile',['Tile',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aebbf8834d0d88f0e5b3e1926db5e6758',1,'cutlass::gemm::GemmGlobalTileTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ab96f324083e51ce4c2b73c18803c69a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a74196946c28e98ee60346b0eeede1471',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a9a00be672617162c4c7ac94c7d8980cc',1,'cutlass::gemm::GemmSharedLoadTileATraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac242508ec46db0493a69a589dbfc19e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a2bc41b907417b47f3dca9c3dd358f8bc',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a63f980fea1ff3dd83ac276cfd83a4ce5',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Tile()'],['../structcutlass_1_1ReshapeTile.html#a8d57fe6422aa920d9815a66e5a85b5f5',1,'cutlass::ReshapeTile::Tile()'],['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html#a966a9432cf42dfdff8ad6b89ebd74f06',1,'cutlass::ReshapeTile< Tile_, kAccessSize_, true >::Tile()'],['../structcutlass_1_1TileTraits.html#ab831be0adb255eece4f2e12fd9713831',1,'cutlass::TileTraits::Tile()'],['../structcutlass_1_1TileIteratorBase.html#a954ef18acc12d8256a7d4e37683f8c2c',1,'cutlass::TileIteratorBase::Tile()'],['../structcutlass_1_1TileLoadIterator.html#a7f1499ada284c21624487d4d3a5dbd10',1,'cutlass::TileLoadIterator::Tile()'],['../structcutlass_1_1TileStoreIterator.html#a8a87c8ef986e110a01a9226012594a61',1,'cutlass::TileStoreIterator::Tile()'],['../structcutlass_1_1TileTraitsStrideMajor.html#afbb78ece048b868475d4a6802e6894ac',1,'cutlass::TileTraitsStrideMajor::Tile()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a1607d53544302c12278793bc9b283763',1,'cutlass::TileTraitsContiguousMajor::Tile()'],['../structcutlass_1_1TileTraitsWarpRake.html#adcd658d9daf286368a9d51c8c1647f89',1,'cutlass::TileTraitsWarpRake::Tile()'],['../structcutlass_1_1TileTraitsStandard.html#aee3fee526bc4d4820c03665a2f5f166b',1,'cutlass::TileTraitsStandard::Tile()']]], + ['tile_5fiterator_2eh',['tile_iterator.h',['../tile__iterator_8h.html',1,'']]], + ['tile_20load_20iterator_20concept',['Tile Load Iterator Concept',['../group__tile__load__iterator__concept.html',1,'']]], + ['tile_20store_20iterator_20concept',['Tile Store Iterator Concept',['../group__tile__store__iterator__concept.html',1,'']]], + ['tile_20traits_20concept',['Tile Traits Concept',['../group__tile__traits__concept.html',1,'']]], + ['tile_5ftraits_5fstandard_2eh',['tile_traits_standard.h',['../tile__traits__standard_8h.html',1,'']]], + ['tiledthreadoffset',['TiledThreadOffset',['../structcutlass_1_1TiledThreadOffset.html',1,'cutlass']]], + ['tileiteratorbase',['TileIteratorBase',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20advance_5f_2c_20memoryspace_2c_20index_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratorfragment_3a_3akscalar_2c_20shape_3c_200_2c_200_2c_200_2c_200_20_3e_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratoradvance_3a_3akh_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileloaditerator',['TileLoadIterator',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >'],['../structcutlass_1_1TileLoadIterator.html#a81c9c0b17bf5f214230ecf10e0690a4e',1,'cutlass::TileLoadIterator::TileLoadIterator()'],['../structcutlass_1_1TileLoadIterator.html#a93e166575be3b2f7489833ae5da23f23',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileLoadIterator.html#a53282fa4cb33cfcec79033d26e418af6',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['tileloaditerator_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20tiletraits_5f_3a_3amultiplicandtraits_3a_3akkstrided_20_3f_20iteratoradvance_3a_3akh_20_3aiteratoradvance_3a_3akw_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass']]], + ['tilestoreiterator',['TileStoreIterator',['../structcutlass_1_1TileStoreIterator.html',1,'cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >'],['../structcutlass_1_1TileStoreIterator.html#aac4d49854d63f632627b6974f9b59dbb',1,'cutlass::TileStoreIterator::TileStoreIterator()'],['../structcutlass_1_1TileStoreIterator.html#a037ccd942359e6bc8640a240b13cd330',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileStoreIterator.html#a4f89c5182659de94605300e15c3651b2',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['tiletraits',['TileTraits',['../structcutlass_1_1TileTraits.html',1,'cutlass']]], + ['tiletraitscontiguousmajor',['TileTraitsContiguousMajor',['../structcutlass_1_1TileTraitsContiguousMajor.html',1,'cutlass']]], + ['tiletraitsstandard',['TileTraitsStandard',['../structcutlass_1_1TileTraitsStandard.html',1,'cutlass']]], + ['tiletraitsstridemajor',['TileTraitsStrideMajor',['../structcutlass_1_1TileTraitsStrideMajor.html',1,'cutlass']]], + ['tiletraitswarprake',['TileTraitsWarpRake',['../structcutlass_1_1TileTraitsWarpRake.html',1,'cutlass']]], + ['tilewithoutskew',['TileWithoutSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a050cf5964a2d3683491bc4313ead5450',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a5a5a36fc570e1225b20ce0a48c89d213',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a1f35981a6d661635dfbcf7c7a76056a2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew()']]], + ['tilewithoutskew_5f',['TileWithoutSkew_',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a93ae99460695718babaef6d1ef597e38',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew_()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a3d8be9ddea1cab53d1b4b3d508f9eab8',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew_()']]], + ['tilewithskew',['TileWithSkew',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a72e0214f86cf8b3711d006dcd69d7a17',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a69c7ec2a779718556e6d9119588e791c',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithSkew()']]], + ['traits',['Traits',['../structcutlass_1_1gemm_1_1Gemm.html#a29f52e33e1f1cf150f5062d9ad2590ff',1,'cutlass::gemm::Gemm::Traits()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a645ab6e9e63163ee6bf536717a30fb1b',1,'cutlass::gemm::GemmEpilogue::Traits()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af2b5682b8e6dd13590ec258a44636430',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Traits()'],['../structcutlass_1_1TileIteratorBase.html#ae7add0ee02bbec2c130ebaf608ab0696',1,'cutlass::TileIteratorBase::Traits()'],['../structcutlass_1_1TileLoadIterator.html#a7c6182031d9aa41d0e4a64516723e20a',1,'cutlass::TileLoadIterator::Traits()'],['../structcutlass_1_1TileStoreIterator.html#a6f50a8aec2d7045e9057b93df08172a8',1,'cutlass::TileStoreIterator::Traits()']]], + ['transform',['transform',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a4dd95354137d3cb52752ecdd346a5685',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#aa9fe67c947bf461ba3e3ca48daa34815',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1Copy.html#ab356f0f473aa3fd8df8fb8ddd8e0e9f3',1,'cutlass::Copy::transform(Fragment_ const &src, Fragment_ &dst)'],['../structcutlass_1_1Copy.html#a171f9a44c05b6fb432b0339979de4eb2',1,'cutlass::Copy::transform(InputFragment_ const &src, int offset, Fragment_ &dst)'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ad467ce744bf9d478900fb2661d7a1c26',1,'cutlass::gemm::HgemmSwizzle::transform()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a91ad48362b99a5f96ac1e92e95104f7b',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a819fd33db88a68521108bab2641d73fd',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#aca8a61e8eb1ab33b9c61e2e7d342379d',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a89e078dbf376da872c3993ccbaf744d3',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a084917a512c7a411b76a69f86b906811',1,'cutlass::gemm::IgemmSwizzle::transform()']]], + ['transformed_5fa',['transformed_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a883b28ca237b1ec076856232cfee0c6f',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5fb',['transformed_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a9369a5f819d2a42997491e0df96f47ef',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5ffragment',['transformed_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afa97cb1cfebca0d6977b1c8318bedddf',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['transformedfragment',['TransformedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afe7503a3304eefd633581d6bc73a0108',1,'cutlass::gemm::GlobalLoadStreamBase::TransformedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#aa2227d7fa1edef3f6730c7db41b132b4',1,'cutlass::gemm::SharedLoadStream::TransformedFragment()']]], + ['transformer',['transformer',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a868f82ee87aba37b05721fe8210221c9',1,'cutlass::gemm::GlobalLoadStreamBase::transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#af846390ad0e5b80ccb4e8b95c5fe64a7',1,'cutlass::gemm::SharedLoadStream::transformer()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aa24bd9f94bea04a148b49b2a97b63fbe',1,'cutlass::gemm::GlobalLoadStreamBase::Transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#ad1f70f0dd1027da1353ff7a38f524904',1,'cutlass::gemm::SharedLoadStream::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a882c10bed18f62ece97f5f20f9de3296',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a42c5bafcb226623b3326dbd01fc72f3b',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#aaaccb3f02a857e0c80d2891c6c6dcdb7',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#ae66bb2c1f87e19278ff471c32e71ea85',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html#a98aefa95117dbfdf2e577890318a6c13',1,'cutlass::gemm::IgemmGlobalStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html#a52ecdfd8b94d8d7f4881048e11a33aba',1,'cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html#ad3190650741cef20c1aca919eddd9d72',1,'cutlass::gemm::IgemmGlobalLoadTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html#a49c249026be24ec8a66f5eda99cb855c',1,'cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html#a9edd08d595327a8cc3b8da50622b3bd2',1,'cutlass::gemm::IgemmSharedStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a0b53e18f109ac0fd116e0d01ed6ec197',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a8a4e3ce1174789e2b695bda7b863079f',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a92320b7224a77a8af61e55beef30ad49',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a9728f71c2e7a6a649bd28d8c11241b0a',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()']]], + ['trivialiterator',['TrivialIterator',['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a6cb3664b5cba4280b7055a65ddad7850',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ada8cd3ac6db568bb9bf268ba2c3a3e14',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(Iterator const &it)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a3adf0440f9a0143a61b43d39c3f03721',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(PredicateVector const &_vec)']]], + ['trivialpredicatetileadapter',['TrivialPredicateTileAdapter',['../structcutlass_1_1TrivialPredicateTileAdapter.html',1,'cutlass::TrivialPredicateTileAdapter'],['../structcutlass_1_1TrivialPredicateTileAdapter.html#a7259853a129a7e319b972d3b41dd59d7',1,'cutlass::TrivialPredicateTileAdapter::TrivialPredicateTileAdapter()']]], + ['true_5ftype',['true_type',['../namespacecutlass_1_1platform.html#a0eddc4a3921e137f31fd8014be96e807',1,'cutlass::platform']]], + ['type',['Type',['../structcutlass_1_1StorageType.html#a2b9c99ae52eb4962428f776efc1e7f06',1,'cutlass::StorageType::Type()'],['../structcutlass_1_1StorageType_3_014_01_4.html#aa6754c0eb530544a1457afe1ae94a807',1,'cutlass::StorageType< 4 >::Type()'],['../structcutlass_1_1StorageType_3_012_01_4.html#a66c52fe770774ea01c511aea1af1f8d4',1,'cutlass::StorageType< 2 >::Type()'],['../structcutlass_1_1StorageType_3_011_01_4.html#a4a70002785c378c1f180800f2a65bcd4',1,'cutlass::StorageType< 1 >::Type()'],['../structcutlass_1_1Vectorize.html#a070ec95f4297d769ee53a4d8a650c05e',1,'cutlass::Vectorize::Type()'],['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html#a79f147933e3f520145aee94ae18da3c5',1,'cutlass::Vectorize< Element_, 1 >::Type()'],['../structcutlass_1_1platform_1_1integral__constant.html#af58810ccead8f16ed88cd6a4afdc6e52',1,'cutlass::platform::integral_constant::type()'],['../structcutlass_1_1platform_1_1enable__if.html#aff9c0f270020cf097addf77e53a5af99',1,'cutlass::platform::enable_if::type()'],['../structcutlass_1_1platform_1_1conditional.html#ab6484d0dd6449b5195c4e868026fed11',1,'cutlass::platform::conditional::type()'],['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html#a8d55f500f667de560650554e9c220644',1,'cutlass::platform::conditional< false, T, F >::type()'],['../structcutlass_1_1platform_1_1remove__const.html#ac3662947fa50251daf58240a9c798085',1,'cutlass::platform::remove_const::type()'],['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html#af68706cfaa6af14edc26ad5b974b47e3',1,'cutlass::platform::remove_const< const T >::type()'],['../structcutlass_1_1platform_1_1remove__volatile.html#a4f5b043d46206248d1bbbcf650707dd1',1,'cutlass::platform::remove_volatile::type()'],['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html#aca9bb93efe43106321e4afe0b67542a3',1,'cutlass::platform::remove_volatile< volatile T >::type()'],['../structcutlass_1_1platform_1_1remove__cv.html#a19e5b12cf4eb15ce13d6306735b6de08',1,'cutlass::platform::remove_cv::type()'],['../structcutlass_1_1platform_1_1aligned__storage.html#a9cf0360f335bcd1e9d9e1b266b6dd6c1',1,'cutlass::platform::aligned_storage::type()']]] +]; diff --git a/docs/generated-html/search/all_13.html b/docs/generated-html/search/all_13.html new file mode 100644 index 0000000000..04f66e2fe9 --- /dev/null +++ b/docs/generated-html/search/all_13.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_13.js b/docs/generated-html/search/all_13.js new file mode 100644 index 0000000000..e175495c82 --- /dev/null +++ b/docs/generated-html/search/all_13.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['unique_5fptr',['unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html',1,'cutlass::platform::unique_ptr< T, Deleter >'],['../classcutlass_1_1platform_1_1unique__ptr.html#aa8a370bc7e4c2d99eb85e7fea27b3179',1,'cutlass::platform::unique_ptr::unique_ptr()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a14c8bf5a5deefe4a6602ccd5c5af364c',1,'cutlass::platform::unique_ptr::unique_ptr(pointer p)']]] +]; diff --git a/docs/generated-html/search/all_14.html b/docs/generated-html/search/all_14.html new file mode 100644 index 0000000000..285f34bd21 --- /dev/null +++ b/docs/generated-html/search/all_14.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_14.js b/docs/generated-html/search/all_14.js new file mode 100644 index 0000000000..267126dcc1 --- /dev/null +++ b/docs/generated-html/search/all_14.js @@ -0,0 +1,15 @@ +var searchData= +[ + ['val',['val',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#abc729cc51d5c90b1d7b0df3092d47cd4',1,'cutlass::platform::alignment_of::pad']]], + ['valid',['valid',['../structcutlass_1_1FragmentIterator.html#ab18f8ea676b45831f939715212167a99',1,'cutlass::FragmentIterator::valid()'],['../structcutlass_1_1FragmentConstIterator.html#a01571b2fc566793fd50a10fa82441951',1,'cutlass::FragmentConstIterator::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ac4d2c293f9312b673ea29bf79b2882fd',1,'cutlass::gemm::GemmGlobalIteratorAb::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6594acc213fc8d4289c6c73631f60120',1,'cutlass::gemm::GemmGlobalIteratorCd::valid()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a468f8f503777e4a2b0089ee2bd6c471a',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::valid()'],['../structcutlass_1_1TileIteratorBase.html#af78a2bf3e7507dc7f50343a3c209f770',1,'cutlass::TileIteratorBase::valid()']]], + ['value',['value',['../structcutlass_1_1log2__down.html#a793565cd891559fab765455e847171dca23d1b50f2f02e1026d4b5dc7ebd6880d',1,'cutlass::log2_down::value()'],['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html#ad7d3c2329ab708bd4af36ffaee8509cba282c4c5d8f66dc49544f34071f148b1f',1,'cutlass::log2_down< N, 1, Count >::value()'],['../structcutlass_1_1log2__up.html#a5826002505544547d0c5cc311c2338e3a09591054a7c9b184769d579c56dd09d6',1,'cutlass::log2_up::value()'],['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html#ab001737f02df0a2c514334a1bfa6f1f9a6b6af5b6bf14ee5d3e3f1442e7f75117',1,'cutlass::log2_up< N, 1, Count >::value()'],['../structcutlass_1_1sqrt__est.html#abe44577e3d8f34fc07bb9ecf89b25b11a2e73d046302be2504f50c08d788e9964',1,'cutlass::sqrt_est::value()'],['../structcutlass_1_1divide__assert.html#a20e8b8a803c6b5cfe636724760442e33ab924a64662c2eb917b1dd4ca31fdd2dc',1,'cutlass::divide_assert::value()'],['../structcutlass_1_1platform_1_1integral__constant.html#a9bbaca83ae76941edb9b75b2741d3ad9',1,'cutlass::platform::integral_constant::value()'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac7e3ab73057682cc2eb6ed74c33e5eff',1,'cutlass::platform::is_base_of_helper::value()'],['../structcutlass_1_1platform_1_1alignment__of.html#aa1d40937d3536b68e90c580765821389aa36284864bc3d1f73d3bf73cd8da7c83',1,'cutlass::platform::alignment_of::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html#a6005c446eb41749276e0114b82abd990a5b0129d0f9bb45f1c56506efbbb22b6f',1,'cutlass::platform::alignment_of< int4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html#ac55e0c5a0bc4c95981744e55ee7580cea807729922944eede573430b20ad4b322',1,'cutlass::platform::alignment_of< uint4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html#ac9e709c32271b14b35c9607c64835a95a6a6ee3f24f4d123fc7c138fe5b776f2e',1,'cutlass::platform::alignment_of< float4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html#ad58512f76f0b9b000d48f1ff869a0547a3d020dd8ba5c735a60d7c2c897e158f5',1,'cutlass::platform::alignment_of< long4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html#adc0eec628649de183fe984bb46898830a8152a79c27d055dc3d0b8d662c0bc96a',1,'cutlass::platform::alignment_of< ulong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html#aadf6522691db02f1aab22c22716f0793a940fa73dc4f0a49b78e4e0cefaf4775d',1,'cutlass::platform::alignment_of< longlong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html#a511f088278b3de04feb55ab60bdc5a09a58b5cc7be52956c43c2966af5887db80',1,'cutlass::platform::alignment_of< ulonglong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html#a5fb114d264023728cca5364401bd6929a7b89d57c8009e094f69ff57e196d8318',1,'cutlass::platform::alignment_of< double2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html#a666c4fd30155873e3499f5cdc11782daafc1a7c2bb5e6483d42d380a2b4fd9561',1,'cutlass::platform::alignment_of< longlong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html#a2568c1ab218cab6505bd20e3c2c420ffa54f6e1afec0ed30b18ab79fd6faf81b5',1,'cutlass::platform::alignment_of< ulonglong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html#a024eaf40a8f3e8bd38b416868e0c68bca5a60b16666306472e92ad1320473ba85',1,'cutlass::platform::alignment_of< double4 >::value()']]], + ['value_5ftype',['value_type',['../structcutlass_1_1platform_1_1integral__constant.html#ab2ed0b3506818139f1f96639742e79fd',1,'cutlass::platform::integral_constant']]], + ['vector',['Vector',['../unioncutlass_1_1Vector.html',1,'cutlass::Vector< Scalar_, kLanes_ >'],['../structcutlass_1_1VectorTraits.html#a4ac6196c07e0d3ba8a03cd72a05026a2',1,'cutlass::VectorTraits::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a12b9084c48d2d829730f907485dfb5e5',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#aff21f15596731eacf8c587811bb4ccdb',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Vector()']]], + ['vector_2eh',['vector.h',['../vector_8h.html',1,'']]], + ['vector_3c_20half_2c_20klanes_5f_20_3e',['Vector< half, kLanes_ >',['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html',1,'cutlass']]], + ['vectorize',['Vectorize',['../structcutlass_1_1Vectorize.html',1,'cutlass']]], + ['vectorize_3c_20element_5f_2c_201_20_3e',['Vectorize< Element_, 1 >',['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html',1,'cutlass']]], + ['vectortraits',['VectorTraits',['../structcutlass_1_1VectorTraits.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['VectorTraits< Vector< T, Lanes > >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['VectorTraits< Vector< T, Lanes > const >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_15.html b/docs/generated-html/search/all_15.html new file mode 100644 index 0000000000..0ed74e0122 --- /dev/null +++ b/docs/generated-html/search/all_15.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_15.js b/docs/generated-html/search/all_15.js new file mode 100644 index 0000000000..ddd79cabe1 --- /dev/null +++ b/docs/generated-html/search/all_15.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['warps',['Warps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaff4a5e0f9e4256f184a22cad0ce8cf4',1,'cutlass::gemm::GemmSharedLoadTileATraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7ad7a4e33ed43926e165e66162eb620b',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af4597927405d8bb1ad2c464fad064703',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4764f70691cb3fee91ce47653363aa4f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#abb6ba58a2f2d80db0b2c9c1d88454efd',1,'cutlass::gemm::GemmConfig::Warps()']]], + ['wmma_5fgemm_5fepilogue_5ftraits_2eh',['wmma_gemm_epilogue_traits.h',['../wmma__gemm__epilogue__traits_8h.html',1,'']]], + ['wmma_5fgemm_5fglobal_5ftile_2eh',['wmma_gemm_global_tile.h',['../wmma__gemm__global__tile_8h.html',1,'']]], + ['wmma_5fgemm_5fmultiply_5fadd_2eh',['wmma_gemm_multiply_add.h',['../wmma__gemm__multiply__add_8h.html',1,'']]], + ['wmma_5fgemm_5fshared_5ftile_2eh',['wmma_gemm_shared_tile.h',['../wmma__gemm__shared__tile_8h.html',1,'']]], + ['wmma_5fgemm_5ftraits_2eh',['wmma_gemm_traits.h',['../wmma__gemm__traits_8h.html',1,'']]], + ['wmma_5fmatrix_2eh',['wmma_matrix.h',['../wmma__matrix_8h.html',1,'']]], + ['wmmagemmglobaliteratorcd',['WmmaGemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a505f124fa3f47c6d57b7275e81be6dd3',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa5c14e2a799249fe8bba14aa1dbe69dc',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]], + ['wmmagemmglobaliteratorcdtraits',['WmmaGemmGlobalIteratorCdTraits',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/all_16.html b/docs/generated-html/search/all_16.html new file mode 100644 index 0000000000..696f02520a --- /dev/null +++ b/docs/generated-html/search/all_16.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_16.js b/docs/generated-html/search/all_16.js new file mode 100644 index 0000000000..d8526488a7 --- /dev/null +++ b/docs/generated-html/search/all_16.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['yes',['yes',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac1cf3f804e7686213fd42c678cc6d669',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/all_17.html b/docs/generated-html/search/all_17.html new file mode 100644 index 0000000000..f1e14b6352 --- /dev/null +++ b/docs/generated-html/search/all_17.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_17.js b/docs/generated-html/search/all_17.js new file mode 100644 index 0000000000..10f55890b8 --- /dev/null +++ b/docs/generated-html/search/all_17.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_7eunique_5fptr',['~unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html#a8902399dac4ab64f08f909f2ad9d4bcf',1,'cutlass::platform::unique_ptr']]] +]; diff --git a/docs/generated-html/search/all_2.html b/docs/generated-html/search/all_2.html new file mode 100644 index 0000000000..2f17735ef0 --- /dev/null +++ b/docs/generated-html/search/all_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_2.js b/docs/generated-html/search/all_2.js new file mode 100644 index 0000000000..b440de047e --- /dev/null +++ b/docs/generated-html/search/all_2.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['base',['Base',['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a507f825824e624d80a34ea9395934160',1,'cutlass::gemm::GlobalLoadStream::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a581b7cdeef3e620f246923fa07f9db5a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ae13e0d30a941e16875f196b4844b03ed',1,'cutlass::gemm::GemmGlobalIteratorAb::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a8f8fbb65070589769468c6b1ac6ba7a5',1,'cutlass::gemm::GemmGlobalIteratorCd::Base()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ac0c372c24c4c5340153b11edab874741',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7ec19bf90207a7f598f2ec5166649495',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aca63ec1099444c555299dc144282dded',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a4b23ba8c14e26672a516aa43063250c2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#a07f9a934f04610db41aa1aac2f4cdf04',1,'cutlass::gemm::IgemmEpilogue::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a98b415dbe6f7b6cb0c41a4e6b3ad5abf',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::Base()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#ab19f72d239f639f261fbb63f72f10acf',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#affd04d88a0bbef13c54f10000a5dc15d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aef7047c6a0d0c3db0bfb6bec08520aad',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#a194aa2762885c3d556a84ff410200b86',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a48a8eda430139e6a131654a54bbf0f3b',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Base()'],['../classcutlass_1_1TensorView.html#a27f09c55f879410cceb75eb25fe542d4',1,'cutlass::TensorView::Base()'],['../structcutlass_1_1TileLoadIterator.html#a1bc1bd4893c14b313ee71b71db2903f3',1,'cutlass::TileLoadIterator::Base()'],['../structcutlass_1_1TileStoreIterator.html#af4576dca736bab8ac73b308522cb4a67',1,'cutlass::TileStoreIterator::Base()']]], + ['baseparams',['BaseParams',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a09268125f1e323874f6c12b50185c517',1,'cutlass::gemm::GemmGlobalIteratorAb::BaseParams()'],['../structcutlass_1_1TileLoadIterator.html#a788bab4fa46dc26854348b751cf1cc76',1,'cutlass::TileLoadIterator::BaseParams()'],['../structcutlass_1_1TileStoreIterator.html#a5484b46ac2646edb7a185b51137f70c0',1,'cutlass::TileStoreIterator::BaseParams()']]], + ['begin',['begin',['../structcutlass_1_1PredicateVector.html#a649045d8224514a4c28bcaf4b247b4a5',1,'cutlass::PredicateVector']]], + ['beta',['beta',['../structcutlass_1_1gemm_1_1GemmDesc.html#ab91b702a9932144b388fad3159130332',1,'cutlass::gemm::GemmDesc::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a0e455ad2e4eba67259867f9123ca817b',1,'cutlass::gemm::LinearScaling::Params::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a8af4e58c4988838f2dd0a2172c47e12e',1,'cutlass::gemm::LinearScaling::beta()']]], + ['blockswizzle',['BlockSwizzle',['../structcutlass_1_1gemm_1_1GemmTraits.html#a50672b5fa67d858aeff8f254cf28e941',1,'cutlass::gemm::GemmTraits']]], + ['bool_5fconstant',['bool_constant',['../structcutlass_1_1platform_1_1bool__constant.html',1,'cutlass::platform']]], + ['byte',['byte',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#a86f075f91b80918e968951713430f0b4',1,'cutlass::platform::alignment_of::pad']]] +]; diff --git a/docs/generated-html/search/all_3.html b/docs/generated-html/search/all_3.html new file mode 100644 index 0000000000..a3e6f7dbbe --- /dev/null +++ b/docs/generated-html/search/all_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_3.js b/docs/generated-html/search/all_3.js new file mode 100644 index 0000000000..fa720a8db8 --- /dev/null +++ b/docs/generated-html/search/all_3.js @@ -0,0 +1,59 @@ +var searchData= +[ + ['check',['check',['../structcutlass_1_1platform_1_1is__base__of__helper.html#a5bf08859497e304ca353699ad6ac332b',1,'cutlass::platform::is_base_of_helper::check(DerivedT *, T)'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae8896817cabf297437b3a073e693ffd2',1,'cutlass::platform::is_base_of_helper::check(BaseT *, int)']]], + ['clamp',['clamp',['../structcutlass_1_1Coord.html#a482ada6da62f427987c22098796fcf7e',1,'cutlass::Coord']]], + ['clear',['clear',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a5513254af1f9979b6d0b9f236c3e7325',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::clear()'],['../structcutlass_1_1Fragment.html#a29e7408fcde8cdf9de5e3a10eaa46391',1,'cutlass::Fragment::clear()'],['../structcutlass_1_1gemm_1_1ClearAccumulators.html#adb8026a19b09e9a581ec767c2c2da4ab',1,'cutlass::gemm::ClearAccumulators::clear()']]], + ['clear_5faccumulators_2eh',['clear_accumulators.h',['../clear__accumulators_8h.html',1,'']]], + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1ClearAccumulators.html',1,'cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ae1cf7988c9cff79a2c3252aaf91fc165',1,'cutlass::gemm::GemmTraits::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#aba2366bec386c74df47dfd0426b07041',1,'cutlass::gemm::HgemmTraitsHelper::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5645e18de29a84c9a9b3f3105966f0c5',1,'cutlass::gemm::IgemmTraitsHelper::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1ClearAccumulators.html#a4ba07ea6d6fef961de1cb95b13c672ef',1,'cutlass::gemm::ClearAccumulators::ClearAccumulators()']]], + ['commit',['commit',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6ce2c6e81d159d8e9ab736cb263f44ae',1,'cutlass::gemm::GlobalLoadStreamBase::commit()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9cc435369c7fc76d0bb6233a8258e257',1,'cutlass::gemm::SharedLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a6dc512be014b9d849057e2fd4c0b0485',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#ade2d85507dec77591e66276339a1eef5',1,'cutlass::gemm::GemmTraits::SharedLoadStream::commit()']]], + ['computeoffsetfromshape',['ComputeOffsetFromShape',['../structcutlass_1_1ComputeOffsetFromShape.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_20ksc_5f_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides',['ComputeOffsetFromStrides',['../structcutlass_1_1ComputeOffsetFromStrides.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html',1,'cutlass']]], + ['computethreadoffsetfromstrides',['ComputeThreadOffsetFromStrides',['../structcutlass_1_1ComputeThreadOffsetFromStrides.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_201_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_20t_5fc_5f_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html',1,'cutlass']]], + ['conditional',['conditional',['../structcutlass_1_1platform_1_1conditional.html',1,'cutlass::platform']]], + ['conditional_3c_20false_2c_20t_2c_20f_20_3e',['conditional< false, T, F >',['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html',1,'cutlass::platform']]], + ['congruous',['Congruous',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html#abe4eb7f9a0ed7d48a81029e88849dcf2',1,'cutlass::gemm::GemmOperandTraitsAb']]], + ['const_5fbegin',['const_begin',['../structcutlass_1_1PredicateVector.html#aeb7f9226a4fa49d06500c3c83958dc41',1,'cutlass::PredicateVector']]], + ['const_5fend',['const_end',['../structcutlass_1_1PredicateVector.html#ab931610bc07ee0e87bb4d9a4d53a2321',1,'cutlass::PredicateVector']]], + ['const_5fref',['const_ref',['../classcutlass_1_1TensorView.html#a23564f1d333bb16343ed3a885f894285',1,'cutlass::TensorView']]], + ['constexpr',['constexpr',['../platform_8h.html#a72f0657181cca64b44eb186b707eb380',1,'platform.h']]], + ['constiterator',['ConstIterator',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::ConstIterator'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a1216aab9c567ec0d4232019008ef3ea7',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(ConstIterator const &it)'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a590e4f4533c87162c0b79e8d876a8fda',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(PredicateVector const &_vec, int _start=0)']]], + ['constpredicatetileadapter',['ConstPredicateTileAdapter',['../structcutlass_1_1ConstPredicateTileAdapter.html',1,'cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a9abd78d5c3e444bfb23d2b1a08be2be1',1,'cutlass::ConstPredicateTileAdapter::ConstPredicateTileAdapter()']]], + ['consttensorref_5ft',['ConstTensorRef_t',['../classcutlass_1_1TensorView.html#a8ef76170bc5ba832dc01339133021830',1,'cutlass::TensorView']]], + ['contains',['contains',['../classcutlass_1_1TensorView.html#aa94063d9a9c6e599d3f53e22433274be',1,'cutlass::TensorView']]], + ['convert',['Convert',['../structcutlass_1_1Convert.html',1,'cutlass::Convert< InputFragment_, OutputFragment_ >'],['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a593a5a2c48708965e829d242ccb3b99f',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::Convert()'],['../classcutlass_1_1TensorRef.html#a7eb4444e2b3fce5a5ccde65a75df633c',1,'cutlass::TensorRef::convert()']]], + ['convert_2eh',['convert.h',['../convert_8h.html',1,'']]], + ['convert_3c_20fragment_3c_20inputscalar_5f_2c_20kscalars_5f_20_3e_2c_20fragment_3c_20outputscalar_5f_2c_20kscalars_5f_20_3e_20_3e',['Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html',1,'cutlass']]], + ['coord',['Coord',['../structcutlass_1_1Coord.html',1,'cutlass::Coord< N_ >'],['../structcutlass_1_1Coord.html#a9cbfff91f0b0d0a149534c97e3d6e69b',1,'cutlass::Coord::Coord(int value=0)'],['../structcutlass_1_1Coord.html#a53a3d88a884f6cb7fda8aedfe2cec2c5',1,'cutlass::Coord::Coord(int _idx[])']]], + ['coord_2eh',['coord.h',['../coord_8h.html',1,'']]], + ['coord_3c_204_20_3e',['Coord< 4 >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_3c_20rank_20_3e',['Coord< Rank >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_5ft',['Coord_t',['../classcutlass_1_1TensorView.html#a4037baf5069138ec3967810d2e185017',1,'cutlass::TensorView']]], + ['copy',['Copy',['../structcutlass_1_1Copy.html',1,'cutlass::Copy< Fragment_ >'],['../structcutlass_1_1Copy.html#ab2c20f886208396a1779c6d29b56c3f1',1,'cutlass::Copy::Copy()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#af7a15b4456cda01c1ffbb2fdc532e87e',1,'cutlass::gemm::GlobalLoadStreamBase::copy()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a7f6bf3b8d70bcd74d84519decd9f0d8e',1,'cutlass::gemm::SharedLoadStream::copy(FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a279144e9722055d4b862e3fa25948762',1,'cutlass::gemm::SharedLoadStream::copy(int d, FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ae033f55779b45b4228f40a4d699062bb',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::copy()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#af25495bb0bb35bd64246d3a80fe4806f',1,'cutlass::gemm::GemmTraits::SharedLoadStream::copy()']]], + ['core_5fio_2eh',['core_io.h',['../core__io_8h.html',1,'']]], + ['count',['count',['../structcutlass_1_1Coord.html#a40429a9154f7a142ad7e9eb35282d196',1,'cutlass::Coord']]], + ['cuda_5flog',['CUDA_LOG',['../debug_8h.html#a27e3466bcf1ec7fda4f6f95aa0a51177',1,'debug.h']]], + ['cuda_5flog_5fdebug',['CUDA_LOG_DEBUG',['../debug_8h.html#a8d6986db819719ada8b29d53dfc104a6',1,'debug.h']]], + ['cuda_5fperror',['CUDA_PERROR',['../debug_8h.html#aed8337b88d71895f95f8980ef0b3a50b',1,'debug.h']]], + ['cuda_5fperror_5fdebug',['CUDA_PERROR_DEBUG',['../debug_8h.html#a36436f5408940a47ac5cdfc9b31648db',1,'debug.h']]], + ['cuda_5fperror_5fexit',['CUDA_PERROR_EXIT',['../debug_8h.html#a002632ff687c83cff0484476be401f05',1,'debug.h']]], + ['cuda_5fperror_5fimpl',['cuda_perror_impl',['../namespacecutlass.html#a6d3dfeb642a2ce3d5f52243fe48f89cc',1,'cutlass']]], + ['cutlass',['cutlass',['../namespacecutlass.html',1,'']]], + ['cutlass_2eh',['cutlass.h',['../cutlass_8h.html',1,'']]], + ['cutlass_5fassert',['CUTLASS_ASSERT',['../cutlass_8h.html#a0159b8e4cd578881a1ccfd0921516af7',1,'cutlass.h']]], + ['cutlass_5fhost_5fdevice',['CUTLASS_HOST_DEVICE',['../cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1',1,'cutlass.h']]], + ['cutlass_5fmajor',['CUTLASS_MAJOR',['../cutlass_8h.html#a8ff3cda9323810c1c504793a0206d4b8',1,'cutlass.h']]], + ['cutlass_5fmath_2eh',['cutlass_math.h',['../cutlass__math_8h.html',1,'']]], + ['cutlass_5fminor',['CUTLASS_MINOR',['../cutlass_8h.html#ad114a1ab01f73833ea00020ffb7bcea7',1,'cutlass.h']]], + ['cutlass_5fpatch',['CUTLASS_PATCH',['../cutlass_8h.html#a1d4e5818a594bbfc472e54978955cb8b',1,'cutlass.h']]], + ['cutlass_5fpragma_5fno_5funroll',['CUTLASS_PRAGMA_NO_UNROLL',['../cutlass_8h.html#adb3bc73d74b4a4bf13099d5696db3352',1,'cutlass.h']]], + ['cutlass_5fpragma_5funroll',['CUTLASS_PRAGMA_UNROLL',['../cutlass_8h.html#a4b1c9f25ab6eaa25e1f2258dd63e6ce4',1,'cutlass.h']]], + ['cutlass_5fversion',['CUTLASS_VERSION',['../cutlass_8h.html#aa3040eddf073214969f9445bfa925039',1,'cutlass.h']]], + ['gemm',['gemm',['../namespacecutlass_1_1gemm.html',1,'cutlass']]], + ['platform',['platform',['../namespacecutlass_1_1platform.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_4.html b/docs/generated-html/search/all_4.html new file mode 100644 index 0000000000..6452295dce --- /dev/null +++ b/docs/generated-html/search/all_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_4.js b/docs/generated-html/search/all_4.js new file mode 100644 index 0000000000..c58b1aa483 --- /dev/null +++ b/docs/generated-html/search/all_4.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['d_5fa',['d_a',['../structcutlass_1_1gemm_1_1GemmDesc.html#aae63781de41962f496da469684919447',1,'cutlass::gemm::GemmDesc']]], + ['d_5fb',['d_b',['../structcutlass_1_1gemm_1_1GemmDesc.html#a05915032eba39bc9b085bec5ff17257b',1,'cutlass::gemm::GemmDesc']]], + ['d_5fc',['d_c',['../structcutlass_1_1gemm_1_1GemmDesc.html#aa2b3126c082d04fd31521cb0e84cf4d5',1,'cutlass::gemm::GemmDesc']]], + ['d_5fd',['d_d',['../structcutlass_1_1gemm_1_1GemmDesc.html#a30326e2d81c8e154d749f35837903216',1,'cutlass::gemm::GemmDesc']]], + ['data',['data',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3af66b82b1a0cc5bf6141f940553e048',1,'cutlass::gemm::GemmGlobalIteratorAb::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a0d3c1a58f23957f9850d1b22992a981a',1,'cutlass::gemm::GemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6fd4e62eb280a5b8c17eb79141414581',1,'cutlass::gemm::GemmGlobalIteratorCd::data() const'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#afe77778a126449e210c0bd6ec2dc6709',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a90e9886534ecbbce69f57b4030d0903f',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data() const'],['../classcutlass_1_1TensorRef.html#a8e23c78658f45c6f197a1774cc85c5b7',1,'cutlass::TensorRef::data()'],['../classcutlass_1_1TensorView.html#a248e4240ccf96c976254464710a73fc8',1,'cutlass::TensorView::data()'],['../structcutlass_1_1TileLoadIterator.html#afb6320b600f1f561594a9fb543b954e4',1,'cutlass::TileLoadIterator::data()'],['../structcutlass_1_1TileStoreIterator.html#a5ebab59862d5f50ad980871515d999b0',1,'cutlass::TileStoreIterator::data()']]], + ['debug_2eh',['debug.h',['../debug_8h.html',1,'']]], + ['default_5fdelete',['default_delete',['../structcutlass_1_1platform_1_1default__delete.html',1,'cutlass::platform']]], + ['default_5fdelete_3c_20t_5b_5d_3e',['default_delete< T[]>',['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html',1,'cutlass::platform']]], + ['deleter_5ftype',['deleter_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a85cab9945c36dc56bd7d6adf30c0d252',1,'cutlass::platform::unique_ptr']]], + ['delta',['Delta',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af1f105d4712f01880b0944666e2f81ae',1,'cutlass::gemm::GemmEpilogueTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aede069e51e0732a9648c437261bd4d66',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a07bb48f99000256f04f00564a4371c2f',1,'cutlass::gemm::GemmGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#aba61fb6e93a6423ab72c082c280f5db4',1,'cutlass::gemm::GemmGlobalTileCdTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a645f65f7d8f123936b286521df470224',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#afd691b764b7d105a1ed41dada6049e71',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a2ee87510d2deccf8b9633aaa4f6340ea',1,'cutlass::gemm::GemmSharedLoadTileATraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ad029d098ba13543bf99c728e6b93006d',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a5587ef22f419ab9a7c6117917cc99c57',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#ac5578da2577cddd5a38cb628f894f644',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a8f8de5a6811b77f0c721cd78a237223e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aed055504ec5f09657e059416150188a9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a08dada072eefded4c859df4e5fc25ca6',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#ab55665f7c2f2cb8b8b9b8ac852d48002',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Delta()'],['../structcutlass_1_1TileTraits.html#af88f5cea9f452d83004ea0fa0f9d56eb',1,'cutlass::TileTraits::Delta()'],['../structcutlass_1_1TileIteratorBase.html#a9bc6c04f4a3adeb5a29743fa43425088',1,'cutlass::TileIteratorBase::Delta()'],['../structcutlass_1_1TileLoadIterator.html#ac2a7f94723259f0d3c7b8a6d5b8778bf',1,'cutlass::TileLoadIterator::Delta()'],['../structcutlass_1_1TileStoreIterator.html#a1c433ba0eea5e6a46f36101d8de98ed0',1,'cutlass::TileStoreIterator::Delta()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a47404b4527b101e286347714aea687d5',1,'cutlass::TileTraitsStrideMajor::Delta()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#ab1a4945bf562debeee1af813288e5896',1,'cutlass::TileTraitsContiguousMajor::Delta()'],['../structcutlass_1_1TileTraitsWarpRake.html#a3ce218b223c5716af40c316899324bbe',1,'cutlass::TileTraitsWarpRake::Delta()']]], + ['dgemm_5ftraits_2eh',['dgemm_traits.h',['../dgemm__traits_8h.html',1,'']]], + ['dgemmconfig',['DgemmConfig',['../structcutlass_1_1gemm_1_1DgemmConfig.html',1,'cutlass::gemm']]], + ['dgemmtraits',['DgemmTraits',['../structcutlass_1_1gemm_1_1DgemmTraits.html',1,'cutlass::gemm']]], + ['divide_5fassert',['divide_assert',['../structcutlass_1_1divide__assert.html',1,'cutlass']]], + ['dot',['dot',['../structcutlass_1_1Coord.html#ad4b3704d14057c043f972827671115cf',1,'cutlass::Coord::dot(Coord const &b, T sum) const'],['../structcutlass_1_1Coord.html#ae023c0c664c22a978e9b9ce5e063aae4',1,'cutlass::Coord::dot(Coord const &b) const']]], + ['dummy',['dummy',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/all_5.html b/docs/generated-html/search/all_5.html new file mode 100644 index 0000000000..e59e1d5363 --- /dev/null +++ b/docs/generated-html/search/all_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_5.js b/docs/generated-html/search/all_5.js new file mode 100644 index 0000000000..066d4cd38c --- /dev/null +++ b/docs/generated-html/search/all_5.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['element',['Element',['../structcutlass_1_1Fragment.html#a9c67fa5bbd0b8b49bd6ec002dee3cbab',1,'cutlass::Fragment::Element()'],['../structcutlass_1_1FragmentIterator.html#ab4ef3c5a6b5e13224e45bbbcb9f1bc5d',1,'cutlass::FragmentIterator::Element()'],['../structcutlass_1_1FragmentConstIterator.html#ae98ab2a88342e7dbf9631cfb5cf5e706',1,'cutlass::FragmentConstIterator::Element()']]], + ['element_5ftype',['element_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a94cea0ebf2ac4bec69dfa1f80ea07d50',1,'cutlass::platform::unique_ptr']]], + ['enable_5fif',['enable_if',['../structcutlass_1_1platform_1_1enable__if.html',1,'cutlass::platform']]], + ['enable_5fif_3c_20false_2c_20t_20_3e',['enable_if< false, T >',['../structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html',1,'cutlass::platform']]], + ['end',['end',['../structcutlass_1_1PredicateVector.html#ad9493fc80fdc33330cc15641779cc275',1,'cutlass::PredicateVector']]], + ['epilogue',['Epilogue',['../structcutlass_1_1gemm_1_1GemmTraits.html#a424f1ac14e1e7ad37428edd0cf13e7fe',1,'cutlass::gemm::GemmTraits::Epilogue()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a234ae6065d5ab56135e10119d3ad2d98',1,'cutlass::gemm::HgemmTraitsHelper::Epilogue()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5e2ed697a9091a1ca8b19855b5a2c651',1,'cutlass::gemm::IgemmTraitsHelper::Epilogue()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a073430a1e8b124aec8a1f1e00f262bc8',1,'cutlass::gemm::GemmTraits::Params::epilogue()'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#afdca9ac1d28e17efaa394f5831a60c04',1,'cutlass::gemm::GemmTraits::SharedStorage::epilogue()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae1983e37454ed14272b23b964614c54c',1,'cutlass::gemm::GemmEpilogue::epilogue()']]], + ['epilogue_5fwith_5for_5fwithout_5fbeta',['epilogue_with_or_without_beta',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0c24dce365565f75e7edc1de1cb50ea4',1,'cutlass::gemm::GemmEpilogue']]], + ['evaluate',['evaluate',['../structcutlass_1_1gemm_1_1LinearScaling.html#a2e0d140aed388d2457dfb24d28fcd08a',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ &output)'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a47a53e5b67b2207fb3ba38a8b9cef448',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)']]], + ['extent',['Extent',['../structcutlass_1_1Extent.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['Extent< Vector< T, Lanes > >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['Extent< Vector< T, Lanes > const >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_6.html b/docs/generated-html/search/all_6.html new file mode 100644 index 0000000000..f75a754e92 --- /dev/null +++ b/docs/generated-html/search/all_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_6.js b/docs/generated-html/search/all_6.js new file mode 100644 index 0000000000..0734def498 --- /dev/null +++ b/docs/generated-html/search/all_6.js @@ -0,0 +1,32 @@ +var searchData= +[ + ['false_5ftype',['false_type',['../namespacecutlass_1_1platform.html#ad8c95b2109070847b13d355120344380',1,'cutlass::platform']]], + ['fetched_5fa',['fetched_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a3147da380e4c1e465aba0b965ac87ab5',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5fb',['fetched_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a837fbec1d47ae45480941de6290889c0',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5ffragment',['fetched_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a26aa580a2697ad02c27f868e7779348d',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['fetchedfragment',['FetchedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0a7f6ae85cfb162b1facf24dff8bab36',1,'cutlass::gemm::GlobalLoadStreamBase::FetchedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a41b45085f17532a6394de3f5ccf201e7',1,'cutlass::gemm::SharedLoadStream::FetchedFragment()']]], + ['fill',['fill',['../structcutlass_1_1PredicateVector.html#a236bd1a822479750a809452fd58dd917',1,'cutlass::PredicateVector']]], + ['fragment',['Fragment',['../structcutlass_1_1Fragment.html',1,'cutlass::Fragment< Element_, kElements_, kAlignment_ >'],['../structcutlass_1_1FragmentIterator.html#afd15cbe1c9a0fd7871b12f3f3042c808',1,'cutlass::FragmentIterator::Fragment()'],['../structcutlass_1_1FragmentConstIterator.html#acac5b62b365f36f370adb0fee11cea05',1,'cutlass::FragmentConstIterator::Fragment()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a32687e2aa49dfa251eab14d5cd2036be',1,'cutlass::gemm::GlobalLoadStreamBase::Fragment()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2180cfbb482d300472ad2993e4b555d4',1,'cutlass::gemm::GemmGlobalIteratorAb::Fragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9f025ed2609bf33230f6a390c22b11b7',1,'cutlass::gemm::SharedLoadStream::Fragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a82dc6d9a10de7aba9a69e6025b2cc2b7',1,'cutlass::gemm::HgemmSwizzle::Fragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a67693ee79f93cb61fc37f2e632eaea8d',1,'cutlass::gemm::IgemmSwizzle::Fragment()'],['../structcutlass_1_1TileIteratorBase.html#a0d7b595d7959cc1680fc07c2e02e1c8e',1,'cutlass::TileIteratorBase::Fragment()'],['../structcutlass_1_1TileLoadIterator.html#aaf72c4897641080b1d84c0bbd8d813cc',1,'cutlass::TileLoadIterator::Fragment()'],['../structcutlass_1_1TileStoreIterator.html#a95da23108b74ad085024ab45e84083e1',1,'cutlass::TileStoreIterator::Fragment()']]], + ['fragment_2eh',['fragment.h',['../fragment_8h.html',1,'']]], + ['fragment_5fa',['fragment_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a4a8c64d85aa012e3689dd024c486924b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragment_5fb',['fragment_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#aa28f34fb0c4bf739246d92c2fef80e0b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragment_20concept',['Fragment Concept',['../group__fragment__concept.html',1,'']]], + ['fragment_20iterator_20concept',['Fragment Iterator Concept',['../group__fragment__iterator__concept.html',1,'']]], + ['fragment_5fload_5fstore_2eh',['fragment_load_store.h',['../fragment__load__store_8h.html',1,'']]], + ['fragment_5fmultiply_5fadd_2eh',['fragment_multiply_add.h',['../fragment__multiply__add_8h.html',1,'']]], + ['fragmenta',['FragmentA',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a1daf96b6d152c5cf32f248bbfd605b74',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a71aadbb130d4b1a6532c45282b37354f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a69d387d932b628dc51c18fcc178c4914',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentA()']]], + ['fragmentb',['FragmentB',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ae79e7fc5be2f4c8d30ca83edc151f63a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a43e278686b493d0aef943f32a9f47b9e',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5429a730a1dea00dc4aecbe8e3ef1620',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentB()']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1FragmentConstIterator.html',1,'cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >'],['../structcutlass_1_1TileIteratorBase.html#a25a241bbdc0b0121992019a16f1a6d60',1,'cutlass::TileIteratorBase::FragmentConstIterator()'],['../structcutlass_1_1TileLoadIterator.html#a4c7a3a4917245de8269b74bdabe16b76',1,'cutlass::TileLoadIterator::FragmentConstIterator()'],['../structcutlass_1_1TileStoreIterator.html#a48de0db7ee2ee9699b946a9d5a0364c7',1,'cutlass::TileStoreIterator::FragmentConstIterator()'],['../structcutlass_1_1FragmentConstIterator.html#ac4b6f351e6e72bed37e425f02a10c81e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(OtherFragment_ &fragment, int offset=0)'],['../structcutlass_1_1FragmentConstIterator.html#a3a8fd8f13c157ed13dc93fd78036c59e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)']]], + ['fragmentelement',['FragmentElement',['../structcutlass_1_1TileIteratorBase.html#ac7cca14d54bf3f0749db1ffaea7c9ae7',1,'cutlass::TileIteratorBase::FragmentElement()'],['../structcutlass_1_1TileLoadIterator.html#a2edd89863b8035137ccd8dd3ad7be464',1,'cutlass::TileLoadIterator::FragmentElement()'],['../structcutlass_1_1TileStoreIterator.html#a2b13136a970fae187fcb377c9be28fac',1,'cutlass::TileStoreIterator::FragmentElement()']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1FragmentIterator.html',1,'cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >'],['../structcutlass_1_1TileIteratorBase.html#a379a52ed1128fc9f93cad35d3e3233e5',1,'cutlass::TileIteratorBase::FragmentIterator()'],['../structcutlass_1_1TileLoadIterator.html#aebbe5a0996dcd362caad618e78dc2591',1,'cutlass::TileLoadIterator::FragmentIterator()'],['../structcutlass_1_1TileStoreIterator.html#a0843b2d82422e7178f324a8d3be9d705',1,'cutlass::TileStoreIterator::FragmentIterator()'],['../structcutlass_1_1FragmentIterator.html#ae1825fe3e138e2aa62d27dab2b5227b4',1,'cutlass::FragmentIterator::FragmentIterator()']]], + ['fragmentload',['FragmentLoad',['../structcutlass_1_1FragmentLoad.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html',1,'cutlass']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html',1,'cutlass::gemm::FragmentMultiplyAdd< Scalar_ >'],['../structcutlass_1_1gemm_1_1LinearScaling.html#aa697d4eaced1ef08247aeb1fcc0f0ea8',1,'cutlass::gemm::LinearScaling::FragmentMultiplyAdd()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af19e14a22aefd1124f7d31beec6f8c42',1,'cutlass::gemm::FragmentMultiplyAdd::FragmentMultiplyAdd()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a21f0965f6178917c7f5c6d79ed048059',1,'cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd()']]], + ['fragmentmultiplyadd_3c_20half_20_3e',['FragmentMultiplyAdd< half >',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html',1,'cutlass::gemm']]], + ['fragmentshape',['FragmentShape',['../structcutlass_1_1FragmentIterator.html#a63ff1767c4923b0a2b6b64487306ed76',1,'cutlass::FragmentIterator::FragmentShape()'],['../structcutlass_1_1FragmentConstIterator.html#a880f12d0cd42cdae7ce6009d2233f577',1,'cutlass::FragmentConstIterator::FragmentShape()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#afe44fedcf24b90c0cf6ac7d1495b89e4',1,'cutlass::gemm::HgemmSwizzle::FragmentShape()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a13a3b052cd8b714471489a9cc4dc7004',1,'cutlass::gemm::IgemmSwizzle::FragmentShape()'],['../structcutlass_1_1TileIteratorBase.html#a14f4b356c9cd320e6e7b451edbf58c24',1,'cutlass::TileIteratorBase::FragmentShape()'],['../structcutlass_1_1TileLoadIterator.html#a7c27a7b0d8593b002eca186c15fdc869',1,'cutlass::TileLoadIterator::FragmentShape()'],['../structcutlass_1_1TileStoreIterator.html#a3b872e85844c9e009fa480a71a829136',1,'cutlass::TileStoreIterator::FragmentShape()']]], + ['fragmentstore',['FragmentStore',['../structcutlass_1_1FragmentStore.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html',1,'cutlass']]], + ['functor',['functor',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#afa888d993b86ed88950a9e5ab7edeb06',1,'cutlass::gemm::GemmEpilogueTraits::Params::functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a6c30bea1b2a1bd2e981025851d5b12d1',1,'cutlass::gemm::GemmEpilogue::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a7cdb30f17692e8fdb3dd4cf4c0b8e9ee',1,'cutlass::gemm::GemmEpilogueTraits::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a981134cf87d85aa28570a62d9e878b10',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Functor()']]] +]; diff --git a/docs/generated-html/search/all_7.html b/docs/generated-html/search/all_7.html new file mode 100644 index 0000000000..88acd94663 --- /dev/null +++ b/docs/generated-html/search/all_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_7.js b/docs/generated-html/search/all_7.js new file mode 100644 index 0000000000..4c4dee8255 --- /dev/null +++ b/docs/generated-html/search/all_7.js @@ -0,0 +1,85 @@ +var searchData= +[ + ['gcd',['gcd',['../namespacecutlass.html#a38481ebfe13bc199aa621ceecfa016b8',1,'cutlass']]], + ['gemm',['Gemm',['../structcutlass_1_1gemm_1_1Gemm.html',1,'cutlass::gemm::Gemm< GemmTraits_ >'],['../structcutlass_1_1gemm_1_1Gemm.html#a8bff0bd32aec05f8c1e282024be0bcfd',1,'cutlass::gemm::Gemm::Gemm()']]], + ['gemm_2eh',['gemm.h',['../gemm_8h.html',1,'']]], + ['gemm_5fepilogue_2eh',['gemm_epilogue.h',['../gemm__epilogue_8h.html',1,'']]], + ['gemm_5fepilogue_5ftraits_2eh',['gemm_epilogue_traits.h',['../gemm__epilogue__traits_8h.html',1,'']]], + ['gemm_5fglobal_5fstream_2eh',['gemm_global_stream.h',['../gemm__global__stream_8h.html',1,'']]], + ['gemm_5fglobal_5ftile_2eh',['gemm_global_tile.h',['../gemm__global__tile_8h.html',1,'']]], + ['gemm_5fkernel',['gemm_kernel',['../namespacecutlass_1_1gemm.html#ad9577c9086b0f7fd1202d7f8109e4439',1,'cutlass::gemm']]], + ['gemm_5foperand_2eh',['gemm_operand.h',['../gemm__operand_8h.html',1,'']]], + ['gemm_5fshared_5fstream_2eh',['gemm_shared_stream.h',['../gemm__shared__stream_8h.html',1,'']]], + ['gemm_5fshared_5ftile_2eh',['gemm_shared_tile.h',['../gemm__shared__tile_8h.html',1,'']]], + ['gemm_5ftraits_2eh',['gemm_traits.h',['../gemm__traits_8h.html',1,'']]], + ['gemmconfig',['GemmConfig',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a4efe5d156abca056ef8b5334fb574dd5',1,'cutlass::gemm::GemmTraits::GemmConfig()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1597c776238f35bcb1acc0a8f8f9c118',1,'cutlass::gemm::HgemmTraitsHelper::GemmConfig()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#af10aebe7ca4e24cce435ac4cd60e7bac',1,'cutlass::gemm::IgemmTraitsHelper::GemmConfig()']]], + ['gemmconfig_3c_20double_2c_20double_2c_20double_2c_20double_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20double_2c_20double_2c_20double_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_202_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_202_2c_201_2c_202_2c_201_2c_202_20_3e',['GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20float_2c_20float_2c_20float_2c_20float_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20float_2c_20float_2c_20float_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_204_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_204_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20half_2c_20half_2c_20half_2c_20half_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20half_2c_20half_2c_20half_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_208_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_208_2c_202_2c_208_2c_202_2c_202_20_3e',['GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_204_2c_202_20_3e',['GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20scalard_5f_2c_20scalard_5f_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmdesc',['GemmDesc',['../structcutlass_1_1gemm_1_1GemmDesc.html',1,'cutlass::gemm']]], + ['gemmepilogue',['GemmEpilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html',1,'cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ab10147070c3a38fca75397f55dc51925',1,'cutlass::gemm::GemmEpilogue::GemmEpilogue()']]], + ['gemmepiloguetraits',['GemmEpilogueTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a4a0f361b5c47d0ab5f3308cd3b3b6ef6',1,'cutlass::gemm::HgemmTraitsHelper::GemmEpilogueTraits()']]], + ['gemmepiloguetraits_3c_20gemmconfig_5f_3a_3aoutputtile_2c_20gemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits_3c_20igemmconfig_5f_3a_3aoutputtile_2c_20igemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper',['GemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper_3c_20igemmconfig_5f_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmglobaliteratorab',['GemmGlobalIteratorAb',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html',1,'cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a34cb153d311377388e7819296a84d07e',1,'cutlass::gemm::GemmGlobalIteratorAb::GemmGlobalIteratorAb()']]], + ['gemmglobaliteratorcd',['GemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html',1,'cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6dae81995ab94c0b7f28eeeeb84a6c8d',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a64f1df43acb37a1901f0b55becaa9557',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]], + ['gemmglobaltilecdtraits',['GemmGlobalTileCdTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits',['GemmGlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits_3c_20gemmoperand_3a_3akc_2c_20matrixlayout_3a_3akcolumnmajor_2c_20scalar_5f_2c_20tile_5f_2c_20threads_5f_2c_20kaccesssize_5f_20_3e',['GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmmultiplicandtraits',['GemmMultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html',1,'cutlass::gemm']]], + ['gemmoperand',['GemmOperand',['../structcutlass_1_1GemmOperand.html',1,'cutlass']]], + ['gemmoperandtraitsab',['GemmOperandTraitsAb',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html',1,'cutlass::gemm']]], + ['gemmsharedloadtileatraits',['GemmSharedLoadTileATraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtilebtraits',['GemmSharedLoadTileBTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtiledtraits',['GemmSharedLoadTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretileabtraits',['GemmSharedStoreTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretiledtraits',['GemmSharedStoreTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstorewithskewtileabtraits',['GemmSharedStoreWithSkewTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera',['GemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ >'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a5557c86a530f5d20a35d3fa620adf417',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ab9e10d54c81a359db0eba58a11b9a0cf',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperA()']]], + ['gemmtiletraitshelpera_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb',['GemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ >'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8768c2b03bea0c3601c47dde2bc7ca89',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a095505bfcea6791accd06bf4d37b9df8',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperB()']]], + ['gemmtiletraitshelperb_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtraits',['GemmTraits',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20epilogue_5f_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreamb_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreamb_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20helper_5f_3a_3agemmconfig_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20helper_5f_3a_3aepilogue_2c_20identityblockswizzle_2c_20index_5f_2c_20helper_5f_3a_3aclearaccumulators_20_3e',['GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['get',['get',['../classcutlass_1_1PredicateVector_1_1Iterator.html#af035589126434bd2dbef4000cd864b8b',1,'cutlass::PredicateVector::Iterator::get()'],['../structcutlass_1_1ComputeOffsetFromShape.html#a3c6f60a59178ffb84899aa449bd51d38',1,'cutlass::ComputeOffsetFromShape::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html#a5198e838e3892245fe7b10884555ec93',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html#a11bf40abc57580db5ce4b0fd4c3e55ff',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides.html#af5e46bc2b325cb6952d2d68c8aca1409',1,'cutlass::ComputeOffsetFromStrides::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html#acdbb9c7cdf9fc054656614f72396434e',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html#a512a9d46f6bea9d85641d7263bcfee36',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides.html#a1744bfe277cbe0c642cce4a48c1dd9ad',1,'cutlass::ComputeThreadOffsetFromStrides::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html#a5d446b2663c01362361e09435a726996',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html#a6e621f5fae2ba29277fde46be1cede24',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >::get()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a2e7c14b8a118f81c1df46ea5045e297b',1,'cutlass::platform::unique_ptr::get()']]], + ['get_5fcoord_5fdhw',['get_Coord_dhw',['../namespacecutlass.html#a4680709eeeb679ef0219938f85f7394e',1,'cutlass']]], + ['get_5fcoord_5fhw',['get_Coord_hw',['../namespacecutlass.html#a7d2ab683e29b47d245e183ad5aeb962e',1,'cutlass::get_Coord_hw(Coord< 3 > const &coord)'],['../namespacecutlass.html#a082e7a2e4acc2879468243f5732ccf0b',1,'cutlass::get_Coord_hw(Coord< 4 > const &coord)']]], + ['get_5fcoord_5fhwc',['get_Coord_hwc',['../namespacecutlass.html#a71f3e2a12b9e98be1fba082610fa9d4f',1,'cutlass']]], + ['get_5fdeleter',['get_deleter',['../classcutlass_1_1platform_1_1unique__ptr.html#a5b8d8ecafb4da336acd50e40cd42b6e0',1,'cutlass::platform::unique_ptr::get_deleter() noexcept'],['../classcutlass_1_1platform_1_1unique__ptr.html#aa427ab4ea4f2336ac6db28d53a4c11ac',1,'cutlass::platform::unique_ptr::get_deleter() const noexcept']]], + ['getextent',['GetExtent',['../structcutlass_1_1gemm_1_1GetExtent.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3aka_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kA, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3akb_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kB, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['global',['global',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#a3c2980547310ec4307f3a5f9817dfc51',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['global_5fstream_5fa',['global_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a575bcff901d69ae3f46987222f23ab64',1,'cutlass::gemm::GemmTraits::Params']]], + ['global_5fstream_5fb',['global_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a46affe35cb16874de5a2b9777aedf596',1,'cutlass::gemm::GemmTraits::Params']]], + ['globalfragmentc',['GlobalFragmentC',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad8e5337f3d19437e9c4cafcfcc3e3d3e',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globalfragmentd',['GlobalFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a723cd69ee4d5c26579b36e02c531ea88',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globaliterator',['GlobalIterator',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a56d3f2606f9464ec57aa61aae378c642',1,'cutlass::gemm::HgemmSwizzle::GlobalIterator()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a880878914c25db44a1781725c24af514',1,'cutlass::gemm::IgemmSwizzle::GlobalIterator()']]], + ['globalloaditeratora',['GlobalLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a5687850f235d644a4820851880740d27',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ab8a3def34300afb5745453d0b33204aa',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ac7ee33e683e48511a1a220df6c9d4758',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorA()']]], + ['globalloaditeratorb',['GlobalLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a362794738bc14b283a91558bcadbbfd5',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a95559f28cab076da723e4cb24351116e',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a3a6d816852cca926afa08103f754477b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorB()']]], + ['globalloaditeratorc',['GlobalLoadIteratorC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aecb5429363c7156ee3ad596fe250120a',1,'cutlass::gemm::GemmEpilogue::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a8409d84ee282a4d6953bd41149d8b9c2',1,'cutlass::gemm::GemmEpilogueTraits::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aeea13630bb281834b717f8d9d13a9319',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a24826f99d097eea0298e6be12a6327b9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadIteratorC()']]], + ['globalloadstream',['GlobalLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream'],['../structcutlass_1_1gemm_1_1GlobalLoadStream.html',1,'cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >'],['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a4dd11a75375b6b9d7b8dcbd4d402d8d6',1,'cutlass::gemm::GlobalLoadStream::GlobalLoadStream()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ab2961b4db0694cf128d55d38a98db575',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::GlobalLoadStream()']]], + ['globalloadstreama',['GlobalLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a9cd6c3fddfb4315eb52b672900462c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a448c242880183e006b70d839d210a2ec',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a2aaece6093100c71c4d587994200e3bb',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a7fb1354154f303642da72e6fd157d846',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamA()']]], + ['globalloadstreamb',['GlobalLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac393b07e780629fc8254fc22cc6f815b',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aad467ed9a680b4d77acecb096799cd89',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abaf5f16ab0b215b406766ecadab29394',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a88e66ee760aea03687e7b3ccc6ea535b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamB()']]], + ['globalloadstreambase',['GlobalLoadStreamBase',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0fdc0f56d1352b5ad41fd4985edd3278',1,'cutlass::gemm::GlobalLoadStreamBase::GlobalLoadStreamBase()']]], + ['globalloadtiletraits',['GlobalLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a94f00f94a88588522ca3f9f0197a5a9b',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aaa009025dcd6360ead1dc18005688821',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadTileTraits()']]], + ['globalstoreiteratord',['GlobalStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a1c766374d900535c944cf2a2de6925f4',1,'cutlass::gemm::GemmEpilogue::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aeef5745d149770c9f79e12f6d97ffce1',1,'cutlass::gemm::GemmEpilogueTraits::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a23be7b4b498c17f9235a2b4896f1bffb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad3e937c15bfac443b0e3b94d702f46b2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreIteratorD()']]], + ['globalstoretiletraits',['GlobalStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a16d7df2934c3c59d9b8f36f7a2137aee',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a16b06a1611dbd22adaa0c9ee5e1b15bd',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreTileTraits()']]], + ['globaltiletraits',['GlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#adc95f4a8617cdf28e5b5d7d2d1aefec2',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a8160a260acce2362e90d43bce733c69d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a5fee0ed52326c0685e8d8295e40ce064',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afbc41e7b98097b153fd27a48f073a877',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a36e082b2da22d17eeb73af6bd0632314',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1e6356bf5c87271ab9794fcc79edc145',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a738774d1eb79de7e29c372ddfd48258d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a24f38105e3c331c733cb672c3a9be588',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()']]], + ['globaltransformera',['GlobalTransformerA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#af9a98d39d6959a9641f7c3c90df2f98e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a3fb86b6d3e353df6b752510d64c5e647',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a23bb732b7237bcabe3667408f288844d',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerA()']]], + ['globaltransformerb',['GlobalTransformerB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a437070ba4a214aee363315d6019e450c',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7b4de712868095200a338802c1fbb3de',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a600bcc571ea5e04a98663c134d4664b9',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerB()']]], + ['globaltransformerc',['GlobalTransformerC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a41edfd24b7dd2759f8b72ae8534182a9',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a051f25a4aa3ea71ff400582228adbdaa',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a0682b61d1a1a951026ff026bff9361bb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad0116b2e7b2ca1526246e2ff7e73fd2f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerC()']]], + ['globaltransformerd',['GlobalTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a32f618ff19d984447fba7355d46a69a7',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a261e526c6a8e832bc483bf4e486cc9d7',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae96c5a3d58dc7a95543f8749f762ca43',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a880293ef6a48a0f4941c8f984c36f591',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerD()']]], + ['good',['good',['../classcutlass_1_1TensorRef.html#a0c049e523ee0fc98769ed8cd2d026780',1,'cutlass::TensorRef::good()'],['../classcutlass_1_1TensorView.html#a837881bc82704491accf54aad2b9def9',1,'cutlass::TensorView::good()']]], + ['greater',['greater',['../structcutlass_1_1platform_1_1greater.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/all_8.html b/docs/generated-html/search/all_8.html new file mode 100644 index 0000000000..b74d5fd807 --- /dev/null +++ b/docs/generated-html/search/all_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_8.js b/docs/generated-html/search/all_8.js new file mode 100644 index 0000000000..684d3e47c0 --- /dev/null +++ b/docs/generated-html/search/all_8.js @@ -0,0 +1,22 @@ +var searchData= +[ + ['hgemm_5fglobal_5ftile_2eh',['hgemm_global_tile.h',['../hgemm__global__tile_8h.html',1,'']]], + ['hgemm_5fmultiply_5fadd_2eh',['hgemm_multiply_add.h',['../hgemm__multiply__add_8h.html',1,'']]], + ['hgemm_5fswizzle_2eh',['hgemm_swizzle.h',['../hgemm__swizzle_8h.html',1,'']]], + ['hgemm_5ftraits_2eh',['hgemm_traits.h',['../hgemm__traits_8h.html',1,'']]], + ['hgemmconfig',['HgemmConfig',['../structcutlass_1_1gemm_1_1HgemmConfig.html',1,'cutlass::gemm']]], + ['hgemmcrosswiseglobaltiletraits',['HgemmCrosswiseGlobalTileTraits',['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html',1,'cutlass::gemm']]], + ['hgemmswizzle',['HgemmSwizzle',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html',1,'cutlass::gemm::HgemmSwizzle< GlobalIterator_ >'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ac3c52e0fee9b37a3dfc39ca168a63d36',1,'cutlass::gemm::HgemmSwizzle::HgemmSwizzle()']]], + ['hgemmtiletraitshelpera',['HgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb',['HgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtraits',['HgemmTraits',['../structcutlass_1_1gemm_1_1HgemmTraits.html',1,'cutlass::gemm']]], + ['hgemmtraitshelper',['HgemmTraitsHelper',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['hgemmtransformera',['HgemmTransformerA',['../structcutlass_1_1gemm_1_1HgemmTransformerA.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb',['HgemmTransformerB',['../structcutlass_1_1gemm_1_1HgemmTransformerB.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/all_9.html b/docs/generated-html/search/all_9.html new file mode 100644 index 0000000000..95e88dd25c --- /dev/null +++ b/docs/generated-html/search/all_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_9.js b/docs/generated-html/search/all_9.js new file mode 100644 index 0000000000..fe5b5cd3d9 --- /dev/null +++ b/docs/generated-html/search/all_9.js @@ -0,0 +1,107 @@ +var searchData= +[ + ['identity',['Identity',['../structcutlass_1_1Identity.html',1,'cutlass']]], + ['identity_5fblock_5fswizzle_2eh',['identity_block_swizzle.h',['../identity__block__swizzle_8h.html',1,'']]], + ['identityblockswizzle',['IdentityBlockSwizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html',1,'cutlass::gemm::IdentityBlockSwizzle'],['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#abfde9b316173b1c0b8622cf22ffb6d68',1,'cutlass::gemm::IdentityBlockSwizzle::IdentityBlockSwizzle()']]], + ['idx',['idx',['../structcutlass_1_1Coord.html#a50de265129f1db7bdf2f0aefbc6a46bc',1,'cutlass::Coord']]], + ['igemm_5fepilogue_2eh',['igemm_epilogue.h',['../igemm__epilogue_8h.html',1,'']]], + ['igemm_5fglobal_5ftile_2eh',['igemm_global_tile.h',['../igemm__global__tile_8h.html',1,'']]], + ['igemm_5fmultiply_5fadd_2eh',['igemm_multiply_add.h',['../igemm__multiply__add_8h.html',1,'']]], + ['igemm_5fswizzle_2eh',['igemm_swizzle.h',['../igemm__swizzle_8h.html',1,'']]], + ['igemm_5ftraits_2eh',['igemm_traits.h',['../igemm__traits_8h.html',1,'']]], + ['igemmconfig',['IgemmConfig',['../structcutlass_1_1gemm_1_1IgemmConfig.html',1,'cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5a52727bb9b5d5f8afa7d0384f564036',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::IgemmConfig()']]], + ['igemmconfig_3c_20outputtile_5f_2c_20int8_5ft_2c_20accumulatorsperthread_5f_20_3e',['IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >',['../structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html',1,'cutlass::gemm']]], + ['igemmcontiguousglobaltiletraits',['IgemmContiguousGlobalTileTraits',['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html',1,'cutlass::gemm']]], + ['igemmepilogue',['IgemmEpilogue',['../structcutlass_1_1gemm_1_1IgemmEpilogue.html',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >'],['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#ab7a51121d24250d6441ee538e6521dc2',1,'cutlass::gemm::IgemmEpilogue::IgemmEpilogue()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a49ac00bed1532707aacd3ff108c84623',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::IgemmEpilogue()']]], + ['igemmepilogue_3c_20gemmepiloguetraits_5f_2c_20true_20_3e',['IgemmEpilogue< GemmEpilogueTraits_, true >',['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar',['IgemmEpilogueScalar',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar_3c_20int_20_3e',['IgemmEpilogueScalar< int >',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguetraits',['IgemmEpilogueTraits',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['igemmepiloguetraitshelper',['IgemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmfloattoint8converter',['IgemmFloatToInt8Converter',['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html',1,'cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#ac65f020e93584b1bd3cdb849ff625026',1,'cutlass::gemm::IgemmFloatToInt8Converter::IgemmFloatToInt8Converter()']]], + ['igemmgloballoadtransformer',['IgemmGlobalLoadTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html',1,'cutlass::gemm']]], + ['igemmgloballoadtransformer_3c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_2c_20float_20_3e',['IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer',['IgemmGlobalStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer_3c_20float_2c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_20_3e',['IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html',1,'cutlass::gemm']]], + ['igemmint8tofloatconverter',['IgemmInt8ToFloatConverter',['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html',1,'cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a88a55a494d3a30d50477d50bf6a8804d',1,'cutlass::gemm::IgemmInt8ToFloatConverter::IgemmInt8ToFloatConverter()']]], + ['igemmsharedstoretransformer',['IgemmSharedStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmswizzle',['IgemmSwizzle',['../structcutlass_1_1gemm_1_1IgemmSwizzle.html',1,'cutlass::gemm::IgemmSwizzle< GlobalIterator_ >'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac041d287c966cf568599d7e462e81d5a',1,'cutlass::gemm::IgemmSwizzle::IgemmSwizzle()']]], + ['igemmtiletraitshelpera',['IgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb',['IgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtraits',['IgemmTraits',['../structcutlass_1_1gemm_1_1IgemmTraits.html',1,'cutlass::gemm']]], + ['igemmtraitshelper',['IgemmTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmtransformera',['IgemmTransformerA',['../structcutlass_1_1gemm_1_1IgemmTransformerA.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb',['IgemmTransformerB',['../structcutlass_1_1gemm_1_1IgemmTransformerB.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['immediateoffsetstrides',['ImmediateOffsetStrides',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#abc47717230ddde3edc88d2770f6841bf',1,'cutlass::gemm::GemmGlobalTileTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a14e9713b0cd34af433c3cae9b283b54c',1,'cutlass::gemm::GemmGlobalTileCdTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a027bebceeda2287b40915ffd95d494a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a39414f484da7f993bc96d61c97273614',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a8e767b5e2fb95b0b02a0ea3e8ea58368',1,'cutlass::gemm::GemmSharedLoadTileATraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a5e4204b52ee081a37e824ca71c291c03',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ac585815d08290d9a5a9cdbd611ffdac4',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9cfb32f902593e7dc018ee802c3520b8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af53d49bad7060b87a2761fe8a82a7ddd',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ImmediateOffsetStrides()'],['../structcutlass_1_1TileIteratorBase.html#a561ceb1093b28b8dce67df0129b7b8b8',1,'cutlass::TileIteratorBase::ImmediateOffsetStrides()']]], + ['inc_5fadvance',['inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a8c2618ac16362a8362dcddeed71c41d4',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a857db0c999250248b104f17f13fe9bd8',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a1187258cd4068a627e73bee0302f1fc2',1,'cutlass::TileIteratorBase::Params::inc_advance()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a9dea455aa86bb59517b4a4d0309e424b',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_advance()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ab4b8150f19c9f8649d75c69ec0a76e1a',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a174ae7d8aa0664eaf1d6f63c5606baa0',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1TileLoadIterator.html#a91e13a7aad4b0acac002b6dd125abc37',1,'cutlass::TileLoadIterator::inc_advance()'],['../structcutlass_1_1TileStoreIterator.html#a1614b27755cf82c0e1f3e7852c5a4c75',1,'cutlass::TileStoreIterator::inc_advance()']]], + ['inc_5fc',['inc_c',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a12ead84ea9634e963d10c6df7b7792c9',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_c()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a44287250bf5631a490b514859fd101d1',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_c()']]], + ['inc_5fd',['inc_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#af95fa1b5102176a0fa9b17713fd48150',1,'cutlass::TileIteratorBase::Params::inc_d()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1e42503e5a54cdc01308e9030aebdd35',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_d()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad26ab8d8010c9a1d7f3b91f60940b460',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab1ebbe54e4315ac07daf260a88f41d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1TileLoadIterator.html#a0a93f37fd366a48c4ed6cc39aa850eb5',1,'cutlass::TileLoadIterator::inc_d()'],['../structcutlass_1_1TileStoreIterator.html#a74dffe1ddcc84935ab170117e939b7e3',1,'cutlass::TileStoreIterator::inc_d()']]], + ['inc_5fh',['inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#aed94505e5a269d5f33499e71284104f5',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a6306f771718c0c05276e103f30f862b2',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#aea591d4278a8338ae8b50fa0b8f3a366',1,'cutlass::TileIteratorBase::Params::inc_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aa24336597f4a3316d94df6ab0c20f714',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ae07fa10a53d44471a04275145201299e',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa9a733f35e9be67663c9c8f80b0034d4',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1TileLoadIterator.html#a228a95cf2c9c6089287984fcbf5cface',1,'cutlass::TileLoadIterator::inc_h()'],['../structcutlass_1_1TileStoreIterator.html#a3793f5d5846862f22f1de736e36ae7c1',1,'cutlass::TileStoreIterator::inc_h()']]], + ['inc_5fstage',['inc_stage',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#acf22fd09aa537943c16b900d66f1ec6f',1,'cutlass::gemm::SharedLoadStream::inc_stage()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8851150a49e4a9c135279c8c9dfdc592',1,'cutlass::gemm::GemmTraits::SharedLoadStream::inc_stage()'],['../structcutlass_1_1TileLoadIterator.html#aeb3faf5e8f976f5a4d158ceb41a1cc64',1,'cutlass::TileLoadIterator::inc_stage()'],['../structcutlass_1_1TileStoreIterator.html#a187e0852ec4862f6d3cb6249bedc3bb3',1,'cutlass::TileStoreIterator::inc_stage()']]], + ['inc_5fw',['inc_w',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ac6e81450a2d78555a6c2415dcc42b178',1,'cutlass::TileIteratorBase::Params::inc_w()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a622a4dd27162854ec96efea93cdd4380',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aec2d692967d9be5d42673dfde21f5427',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1TileLoadIterator.html#a49cf3ee608debebf451cdd8c2125d073',1,'cutlass::TileLoadIterator::inc_w()'],['../structcutlass_1_1TileStoreIterator.html#aa573a47a9ffc3e07239a09e2bc470cf1',1,'cutlass::TileStoreIterator::inc_w()']]], + ['index',['Index',['../structcutlass_1_1gemm_1_1Gemm.html#a0aca711d07245f3071adeb1111fedd34',1,'cutlass::gemm::Gemm::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a07c93d583bfddd8f916fba6ef809832e',1,'cutlass::gemm::GemmEpilogue::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab430d05bd17efd60c28077c87b5ca331',1,'cutlass::gemm::GemmEpilogueTraits::Index()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6a6e38022606dd8d41cf7264fb059cc2',1,'cutlass::gemm::GlobalLoadStreamBase::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a7ff9cae930c8a6bb9c8ee6d81cb1953f',1,'cutlass::gemm::GemmGlobalIteratorAb::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56847e834b31b88544093c3df54d299f',1,'cutlass::gemm::GemmGlobalIteratorCd::Index()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ae67227cecbe84f5c8497d9a7ff82b367',1,'cutlass::gemm::GemmTraits::Index()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a3f45216454a550a116935aede0bda3de',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Index()'],['../structcutlass_1_1TileIteratorBase.html#a44665808adfd69df0d26cec4b1840cc3',1,'cutlass::TileIteratorBase::Index()'],['../structcutlass_1_1TileLoadIterator.html#aaa83f05e0cb3204053c3ee1da036cd36',1,'cutlass::TileLoadIterator::Index()'],['../structcutlass_1_1TileStoreIterator.html#a5ac2280dfcac08cec17b8c0db1c4593e',1,'cutlass::TileStoreIterator::Index()']]], + ['initialize',['initialize',['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html#ac00c9d78a187d9c7d53399f971c0e129',1,'cutlass::gemm::Gemm::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a3e9d0fd2989fea776b0cab0e0f2813ce',1,'cutlass::gemm::GemmEpilogueTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a7c7e448384156c801ed362359a1a6a40',1,'cutlass::gemm::GlobalLoadStreamBase::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html#a73091e07b6d4c99f6e0319fbf6bd1709',1,'cutlass::gemm::GemmGlobalIteratorAb::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#af5a496f1b6a46ea6a9894512029add6a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#adb66103b905b35a1594c6f0bab65758a',1,'cutlass::gemm::SharedLoadStream::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a8e4d277325bb5e56c718a2298b60d3cf',1,'cutlass::gemm::SharedLoadStream::initialize()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a40023f0ffdd8bee4ccbcaac28222e983',1,'cutlass::gemm::GemmTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a4946e45e10661307f562b27bad5cb72d',1,'cutlass::gemm::LinearScaling::Params::initialize()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#ad6b65c5f3ed7cd9e7ffeb684cbf30d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad2631ffcc963638aa5b016c66a2e2c55',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a3ba93370bd4b2ede4bd4eb97ac0881be',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#af496afebb8983e5d346c681334955224',1,'cutlass::TileIteratorBase::Params::initialize()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aeeea0f8bdee876553a4908b9b7cbaf76',1,'cutlass::TileLoadIterator::Params::initialize(SharedStorage const &storage)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#afd9e82df76ad35fe883b7834457242b2',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aa3922946bb0da0c0040dec44aa389ec1',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aebaecd0f971245ffc5a50fe5f7a9b4e8',1,'cutlass::TileLoadIterator::Params::initialize()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a71f5238a712f7b2f377fb58938ac829b',1,'cutlass::TileStoreIterator::Params::initialize(SharedStorage &storage)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af0d26a2df2a1a5ba3c3169b736bd5d43',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#ac1cfe92f1543ba445fa10f1859a0db98',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af884f720d36aa82e7f972932686ae986',1,'cutlass::TileStoreIterator::Params::initialize()']]], + ['initialize_5fpredicates',['initialize_predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab9375d9e779dcda79a5cd561bb3762ff',1,'cutlass::gemm::GemmGlobalIteratorAb::initialize_predicates()'],['../structcutlass_1_1TileIteratorBase.html#a78b6c0d6a1a96dd55a34bc302ecb07d7',1,'cutlass::TileIteratorBase::initialize_predicates()'],['../structcutlass_1_1TileLoadIterator.html#a8291a51bf96f86bc77d0e3453345dbd5',1,'cutlass::TileLoadIterator::initialize_predicates()'],['../structcutlass_1_1TileStoreIterator.html#af92ba20db048a9ec96976a1673f0f7c2',1,'cutlass::TileStoreIterator::initialize_predicates()']]], + ['inputfragment',['InputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#ac7906301019c3e6d60985c3851f1e95e',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::InputFragment()'],['../structcutlass_1_1Copy.html#aed254bbc1ad94ed9d335ab02f199ceb1',1,'cutlass::Copy::InputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ab5fab63d83eb0444c08bda16491d2627',1,'cutlass::gemm::HgemmSwizzle::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#aa9a4b05f9fc28b80a4ae4aabb2ce1e8c',1,'cutlass::gemm::IgemmFloatToInt8Converter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a702ca51abc077355a2d7343976a0cfdb',1,'cutlass::gemm::IgemmInt8ToFloatConverter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a24a0bd5a9251ba5204b35eb4c4ac7727',1,'cutlass::gemm::IgemmSwizzle::InputFragment()']]], + ['instructionshape',['InstructionShape',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#ac93ba536992debeae86087e638167a13',1,'cutlass::gemm::FragmentMultiplyAdd::InstructionShape()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ab16a3d8adda89cc4f9765116ea75a4b7',1,'cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a3a57d05f50932d718538f0d1ededa95b',1,'cutlass::gemm::GemmConfig::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa56cdefa659af5ce4efd493b94bafdfd',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad73372a37315b0c17a8db21e40a78574',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ac6381210d447fda9b0e9a028d167f22b',1,'cutlass::gemm::ThreadMultiplyAdd::InstructionShape()']]], + ['integral_5fconstant',['integral_constant',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_20v_20_3e',['integral_constant< bool, V >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5farithmetic_3c_20t_20_3e_3a_3avalue_7c_7cis_5fvoid_3c_20t_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20nullptr_5ft_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fbase_5fof_5fhelper_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_7c_7c_28is_5fsame_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5ffundamental_3c_20t_20_3e_3a_3avalue_7c_7cis_5fpointer_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fintegral_3c_20t_20_3e_3a_3avalue_7c_7cis_5ffloating_5fpoint_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fsame_3c_20float_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20double_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28n_20_26_28n_20_2d_201_29_29_3d_3d0_20_3e',['integral_constant< bool,(N &(N - 1))==0 >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['is_5farithmetic',['is_arithmetic',['../structcutlass_1_1platform_1_1is__arithmetic.html',1,'cutlass::platform']]], + ['is_5fbase_5fof',['is_base_of',['../structcutlass_1_1platform_1_1is__base__of.html',1,'cutlass::platform']]], + ['is_5fbase_5fof_5fhelper',['is_base_of_helper',['../structcutlass_1_1platform_1_1is__base__of__helper.html',1,'cutlass::platform']]], + ['is_5ffloating_5fpoint',['is_floating_point',['../structcutlass_1_1platform_1_1is__floating__point.html',1,'cutlass::platform']]], + ['is_5ffundamental',['is_fundamental',['../structcutlass_1_1platform_1_1is__fundamental.html',1,'cutlass::platform']]], + ['is_5fintegral',['is_integral',['../structcutlass_1_1platform_1_1is__integral.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20char_20_3e',['is_integral< char >',['../structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20t_20_3e',['is_integral< const T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20volatile_20t_20_3e',['is_integral< const volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20int_20_3e',['is_integral< int >',['../structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20_3e',['is_integral< long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20long_20_3e',['is_integral< long long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20short_20_3e',['is_integral< short >',['../structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20signed_20char_20_3e',['is_integral< signed char >',['../structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20char_20_3e',['is_integral< unsigned char >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20int_20_3e',['is_integral< unsigned int >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20_3e',['is_integral< unsigned long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20long_20_3e',['is_integral< unsigned long long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20short_20_3e',['is_integral< unsigned short >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20volatile_20t_20_3e',['is_integral< volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fpointer',['is_pointer',['../structcutlass_1_1platform_1_1is__pointer.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper',['is_pointer_helper',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_pointer_helper< remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20t_20_2a_20_3e',['is_pointer_helper< T * >',['../structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html',1,'cutlass::platform']]], + ['is_5fpow2',['is_pow2',['../structcutlass_1_1is__pow2.html',1,'cutlass']]], + ['is_5fsame',['is_same',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20a_2c_20a_20_3e',['is_same< A, A >',['../structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20void_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_same< void, remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5ftrivially_5fcopyable',['is_trivially_copyable',['../structcutlass_1_1platform_1_1is__trivially__copyable.html',1,'cutlass::platform']]], + ['is_5fvoid',['is_void',['../structcutlass_1_1platform_1_1is__void.html',1,'cutlass::platform']]], + ['is_5fvolatile',['is_volatile',['../structcutlass_1_1platform_1_1is__volatile.html',1,'cutlass::platform']]], + ['is_5fvolatile_3c_20volatile_20t_20_3e',['is_volatile< volatile T >',['../structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fzero',['is_zero',['../structcutlass_1_1PredicateVector.html#a1c4fe2bec906cd7937428ed6561ac79a',1,'cutlass::PredicateVector::is_zero()'],['../namespacecutlass_1_1gemm.html#a3e30ae89e6f7501725028144cd2d88cb',1,'cutlass::gemm::is_zero(T x)'],['../namespacecutlass_1_1gemm.html#a4a12fcfae60f26efa47bf0a79483d8ac',1,'cutlass::gemm::is_zero(half x)']]], + ['isvector',['IsVector',['../structcutlass_1_1VectorTraits.html#abf96ea5dfd3212d388cb91e48cc0e6a2',1,'cutlass::VectorTraits::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aead181209c756f25ab5870682670bb99',1,'cutlass::VectorTraits< Vector< T, Lanes > >::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a893488718d8437970c1b4ed4f4056620',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::IsVector()']]], + ['iterations',['Iterations',['../structcutlass_1_1FragmentIterator.html#a4324ae522c6463e66a64f05d2e58b5f0',1,'cutlass::FragmentIterator::Iterations()'],['../structcutlass_1_1FragmentConstIterator.html#a527100e34ed700787b1419157710dbb2',1,'cutlass::FragmentConstIterator::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a8e3c978da6ed56239783bf4db0a936ae',1,'cutlass::gemm::GemmEpilogue::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab00969bdda930eeb7b82985c476adf7d',1,'cutlass::gemm::GemmEpilogueTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ad7b23352072b1509d3383ee775756d2a',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aaf6410f99d7f995792d0ac34efd3a82f',1,'cutlass::gemm::GemmGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a72eebc18d31900db57fa77508016f64a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a6125e052e47296c3ef53c8a149ffd31b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a025445699c5c86237d8c3e48f01081ea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#ae96e490d38ade6db4d853fb6c8f3378b',1,'cutlass::gemm::GemmSharedLoadTileATraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a27bc06b72a94e34d5da6fbfb950459b5',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a6bacc866485330f80596f634e6d14336',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a81ca35e0c5d9553d1dccc981cbd89d47',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#aa9b46937bea47d071d277aa212dd610b',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a0b9b2b7838cb13a61a16501a2662fa51',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a9fb4b56091d4458ebd82130bc3951e5b',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Iterations()'],['../structcutlass_1_1PredicateTileAdapter.html#a1f2d52eec9f488c2a53c4d62af824450',1,'cutlass::PredicateTileAdapter::Iterations()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a5e461e0eb376de60605a6ab5fdc38058',1,'cutlass::ConstPredicateTileAdapter::Iterations()'],['../structcutlass_1_1TileTraits.html#af7ae2fdb4c8f1702169cc7d437d2b469',1,'cutlass::TileTraits::Iterations()'],['../structcutlass_1_1TileIteratorBase.html#a352ed0773b37f03bf68e4b6cf9899474',1,'cutlass::TileIteratorBase::Iterations()'],['../structcutlass_1_1TileLoadIterator.html#a9720b1e4a10c2d5aa85f9a9c66a31bbf',1,'cutlass::TileLoadIterator::Iterations()'],['../structcutlass_1_1TileStoreIterator.html#a552a67fb03c28e985d143f6193f88308',1,'cutlass::TileStoreIterator::Iterations()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a03a32694da75bb95422c6b550e3324e2',1,'cutlass::TileTraitsStrideMajor::Iterations()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a425a20b642ae8736c12626b2de9b8b82',1,'cutlass::TileTraitsContiguousMajor::Iterations()'],['../structcutlass_1_1TileTraitsWarpRake.html#a410e44aa83f2179152a48f7aceb05323',1,'cutlass::TileTraitsWarpRake::Iterations()']]], + ['iterationsstrides',['IterationsStrides',['../structcutlass_1_1FragmentConstIterator.html#ab683796885f3bae3765efd96883f311b',1,'cutlass::FragmentConstIterator']]], + ['iterator',['Iterator',['../classcutlass_1_1PredicateVector_1_1Iterator.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Iterator'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6925270c4ad157554ab155cddc7b46e6',1,'cutlass::gemm::SharedLoadStream::Iterator()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a91b7d25cbd64e696ef23c87671f0b077',1,'cutlass::PredicateVector::Iterator::Iterator(Iterator const &it)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a83c2f584bd061f0b9b6b2a6cddf5b038',1,'cutlass::PredicateVector::Iterator::Iterator(PredicateVector &_vec, int _start=0)'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#ae59f871c06a0ac7b9224f0de923082d7',1,'cutlass::gemm::SharedLoadStream::Params::iterator()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a54481a42d4125e3693a086269d9a7b10',1,'cutlass::gemm::SharedLoadStream::iterator()']]], + ['iterator_5faccess_2eh',['iterator_access.h',['../iterator__access_8h.html',1,'']]], + ['iterator_5fc',['iterator_c',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a7350ceefcd09a9e3662ca30b780cc2ce',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['iterator_5fd',['iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a987c179a7e73c2572fe8aef3255668f7',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['iterator_5fload',['iterator_load',['../namespacecutlass.html#a45dd7add04736cb5c3e69991d2f210be',1,'cutlass::iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a50f08aa93d7fe6825599d17e3c977031',1,'cutlass::iterator_load(InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#aca491136bdb966638a7ae57c47f86d1e',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#af25d56f7391322d9a3b9aa3c507f90dc',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fload_5fpost_5fincrement',['iterator_load_post_increment',['../namespacecutlass.html#a3965068d8a4fdfe5e05782930fb4fe6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#af5abe551df7461eab66aa43907063d6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#afb8e7a4e611e8b5ae7ca19d02f791d37',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore',['iterator_store',['../namespacecutlass.html#a0cb5bdf7bef498705c51a9cdcbef71f9',1,'cutlass::iterator_store(OutputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a88dce4b124a294cc123f7cf5fd2d6472',1,'cutlass::iterator_store(OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#a410ed4d45ccafc2db842967740b6211f',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#ad804b804ac19360b293046f9cbfd8dd5',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore_5fpost_5fincrement',['iterator_store_post_increment',['../namespacecutlass.html#a5bf15cbf4cf4649d895fcbc2edf6a2de',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#ab8efb0edefca7a59acc5a14b7311130c',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#a96fdb65e922f6a3d46aa5de9ea78d460',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]], + ['iteratoradvance',['IteratorAdvance',['../structcutlass_1_1IteratorAdvance.html',1,'cutlass']]], + ['iteratorfragment',['IteratorFragment',['../structcutlass_1_1IteratorFragment.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_a.html b/docs/generated-html/search/all_a.html new file mode 100644 index 0000000000..3148a8e513 --- /dev/null +++ b/docs/generated-html/search/all_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_a.js b/docs/generated-html/search/all_a.js new file mode 100644 index 0000000000..fec5dfcc8b --- /dev/null +++ b/docs/generated-html/search/all_a.js @@ -0,0 +1,76 @@ +var searchData= +[ + ['k',['k',['../structcutlass_1_1gemm_1_1GemmDesc.html#ac789a7e5d2db65d006f1e8e3df542a6f',1,'cutlass::gemm::GemmDesc::k()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aae3a008b39f9678a03192f6ff54152d8',1,'cutlass::gemm::GemmTraits::Params::k()']]], + ['ka',['kA',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0cac2b9fe9e3679a059d1a6c946b2a2c31a',1,'cutlass::GemmOperand']]], + ['kaccesssize',['kAccessSize',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aa001e09b246fdd8259cbda6a500cad5f',1,'cutlass::gemm::GemmGlobalTileTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae852c89da0455025c0c41af258e47047',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a846e6d8d06be0ba6fa41b1431c8ec061',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0a33d4289ed45e988d560b5f73ac997e',1,'cutlass::gemm::GemmSharedLoadTileATraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aa41cc5dc82fe08457d103545f8f63081',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9521c4017e227b2511891a7fb18513e1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8d308d593b59624abe3e228d588be61d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kAccessSize()'],['../structcutlass_1_1TileIteratorBase.html#aef07ba456ea016092d7d2446751b76a3',1,'cutlass::TileIteratorBase::kAccessSize()']]], + ['kaccumulatorsperldsa',['kAccumulatorsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#abbdd356f280099269867e614684645cf',1,'cutlass::gemm::GemmConfig']]], + ['kaccumulatorsperldsb',['kAccumulatorsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a9dd092bca2f1f2c039f367b23bafa9c1',1,'cutlass::gemm::GemmConfig']]], + ['kadvance',['kAdvance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a8c1e871f17685b16a7a41fcc888f0125',1,'cutlass::gemm::GemmGlobalIteratorAb::kAdvance()'],['../structcutlass_1_1TileIteratorBase.html#ac1a64e974dcd69c3a86a31db6cbff421',1,'cutlass::TileIteratorBase::kAdvance()'],['../structcutlass_1_1TileLoadIterator.html#a69d2f21c8188fb3229af8c2dbe0a23b6',1,'cutlass::TileLoadIterator::kAdvance()'],['../structcutlass_1_1TileStoreIterator.html#a8059c57030df99b73309e9210ec5f624',1,'cutlass::TileStoreIterator::kAdvance()']]], + ['kb',['kB',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0caad0876342d150cef7da6ae149d5e99f9',1,'cutlass::GemmOperand']]], + ['kbytes',['kBytes',['../structcutlass_1_1PredicateVector.html#ab870e074b33c598f69fe11e104615c5a',1,'cutlass::PredicateVector']]], + ['kc',['kC',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca7598e104da2001a76ec344f1c1b9c6dc',1,'cutlass::GemmOperand::kC()'],['../structcutlass_1_1Shape.html#a3f2433fd6401dd28f1130499f9fd340c',1,'cutlass::Shape::kC()']]], + ['kcolumnmajor',['kColumnMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2bac15988acba79c11072d38b295f163a2b',1,'cutlass::MatrixLayout']]], + ['kcount',['kCount',['../structcutlass_1_1ShapeCount.html#a8d25b48b3294b5563f89c62a6e6d00e5',1,'cutlass::ShapeCount']]], + ['kd',['kD',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca49eef82461e44c96462f9c4dbaab71fe',1,'cutlass::GemmOperand::kD()'],['../structcutlass_1_1Shape.html#a19086a5567d6c710ec853e35a7f29c25',1,'cutlass::Shape::kD()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa56ecb02f4ed3bd7ae4a9c971805ee8c5',1,'cutlass::IteratorAdvance::kD()']]], + ['kdhw',['kDhw',['../structcutlass_1_1ShapeCount.html#af7d7ccd42de2c49fe57f03cf0e657fe8',1,'cutlass::ShapeCount']]], + ['kdhwc',['kDhwc',['../structcutlass_1_1ShapeCount.html#a5a274564d6b8607a0be621b2664fba18',1,'cutlass::ShapeCount']]], + ['kelements',['kElements',['../structcutlass_1_1Fragment.html#a2b9a64391d00ef23dd8d456c2337fa60',1,'cutlass::Fragment']]], + ['kelementsperaccess',['kElementsPerAccess',['../structcutlass_1_1FragmentIterator.html#ad2c43e30e78e8799df7cb02ac08cee9a',1,'cutlass::FragmentIterator::kElementsPerAccess()'],['../structcutlass_1_1FragmentConstIterator.html#a004fabc9caa6924f3fb4badcbb19e88f',1,'cutlass::FragmentConstIterator::kElementsPerAccess()']]], + ['kextent',['kExtent',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html#a881f84951bc9e47ab2be9ef3f2c1e423',1,'cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >::kExtent()'],['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html#a82ff9b447e4a58164b5f7d53d2602930',1,'cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >::kExtent()']]], + ['kfragmentsize',['kFragmentSize',['../structcutlass_1_1TileIteratorBase.html#a4e0b2bc06bb8f52313e4d8c51ab30ff2',1,'cutlass::TileIteratorBase']]], + ['kgeneric',['kGeneric',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca21a44c0b78017acea0d1ffe223e5ca38',1,'cutlass::MemorySpace']]], + ['kglobal',['kGlobal',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03cac4bd4070cc396d698beb7ca2e3bbff37',1,'cutlass::MemorySpace']]], + ['kh',['kH',['../structcutlass_1_1Shape.html#a3a20d9062bba613c160bb2cd14f80a5e',1,'cutlass::Shape::kH()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaacfe756fca665eb1bbf389850915c1b81',1,'cutlass::IteratorAdvance::kH()']]], + ['khw',['kHw',['../structcutlass_1_1ShapeCount.html#afc957be69eb78e4849ba8ab3cc66583f',1,'cutlass::ShapeCount']]], + ['khwc',['kHwc',['../structcutlass_1_1ShapeCount.html#a75324e2c9d31a0787343fc994586b742',1,'cutlass::ShapeCount']]], + ['kind',['Kind',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375',1,'cutlass::Identity::Kind()'],['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03c',1,'cutlass::MemorySpace::Kind()'],['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2b',1,'cutlass::MatrixLayout::Kind()'],['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0c',1,'cutlass::GemmOperand::Kind()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738dda',1,'cutlass::IteratorAdvance::Kind()'],['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80',1,'cutlass::IteratorFragment::Kind()']]], + ['kint8output',['kInt8Output',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html#a8609af98d1e43cd25688bae6f33feed4',1,'cutlass::gemm::IgemmEpilogueTraits']]], + ['kiterationsd',['kIterationsD',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8663311646210b690bb0c2a1012e82f0',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsh',['kIterationsH',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a3b1a461c1dfbcd3817ab2d57bd0da9f1',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsinhperwarp',['kIterationsInHPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4b8d66df02ba1653aa6d1f23b967f237',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiteratorfragment',['kIteratorFragment',['../structcutlass_1_1TileIteratorBase.html#a38c8ec1e9d0117172981b4c7dd4bf3be',1,'cutlass::TileIteratorBase::kIteratorFragment()'],['../structcutlass_1_1TileLoadIterator.html#aba1d75a0cd5f11dee2aecf89b2b13d98',1,'cutlass::TileLoadIterator::kIteratorFragment()'],['../structcutlass_1_1TileStoreIterator.html#a94c0567316118abfb84fc28560a5a46a',1,'cutlass::TileStoreIterator::kIteratorFragment()']]], + ['kkstrided',['kKstrided',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a1984c9ef6abfd029acbc3f702593ab85',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['klanes',['kLanes',['../unioncutlass_1_1Vector.html#a824f9ab976c8e7f035236af03e5ae839a605c5e987bc7b08d743f29a6524abb27',1,'cutlass::Vector::kLanes()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#aa70d2fd36f00b63d321c1f7b6d6c3024ad242b575673ca1bf9cf311e58a966392',1,'cutlass::Vector< half, kLanes_ >::kLanes()'],['../structcutlass_1_1VectorTraits.html#a052e1e5963a9e04482b16cb881d1eaf8',1,'cutlass::VectorTraits::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aca745b59c6c21292f119943e5a480f39',1,'cutlass::VectorTraits< Vector< T, Lanes > >::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a43ac200035052a2c352c8c4b84aac73c',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::kLanes()']]], + ['klayout',['kLayout',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a807cffc6f69f8d30a2fc94cf49fb904c',1,'cutlass::gemm::GlobalLoadStreamBase::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a74bc07cb021a73513ab2fbacd572be90',1,'cutlass::gemm::GemmGlobalTileTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afe016e0c6234075a8d69ba7341555ece',1,'cutlass::gemm::GemmGlobalIteratorAb::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a27b88818f5b094372bf2c6e090c9148a',1,'cutlass::gemm::GemmGlobalIteratorCd::kLayout()'],['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a19076e58e60d296da74cf504e2a473fd',1,'cutlass::gemm::GemmMultiplicandTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ad2010686bceb21aec9a1924ae379edc1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aedd49525e2c849baecf88cdfd9e3515c',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#afbd350793888a7e7b299548dca854c13',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a31fa28168811e2d04fbd74029df785ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ae0f176733ba9dee0cce45435ac5d53ba',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::kLayout()']]], + ['klayouta',['kLayoutA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac5bb5931a707ed7672f69267753ba41b',1,'cutlass::gemm::GemmTraits']]], + ['klayoutb',['kLayoutB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a078e8d9cfa1b182e1b96a2cc8c54b684',1,'cutlass::gemm::GemmTraits']]], + ['kmemoryspace',['kMemorySpace',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#af219ece6e66e2866169e06e15cc4472d',1,'cutlass::gemm::GemmGlobalTileTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a59c981aa720f983b846bed7c3e4a7cab',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ae5a07814b9cfe9a64f69bac0f0772f20',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4456e4c8048bfb378e5b80833a0d19e5',1,'cutlass::gemm::GemmSharedLoadTileATraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7007093a4abf79a0b4bfb3fc85a02620',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a8914bc5154f21fa5fd182b0009c44c39',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#afb4687520eff9c6a21c35a5e04f69de8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kMemorySpace()'],['../structcutlass_1_1TileIteratorBase.html#a871c9b82109eab432c5a1d465643bf97',1,'cutlass::TileIteratorBase::kMemorySpace()'],['../structcutlass_1_1TileLoadIterator.html#ac21bd78b31c99c826f0eddb5aa033bf1',1,'cutlass::TileLoadIterator::kMemorySpace()'],['../structcutlass_1_1TileStoreIterator.html#adaebec9eacf767f63f048033de73ea5b',1,'cutlass::TileStoreIterator::kMemorySpace()']]], + ['koperand',['kOperand',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#ae0bca976b7cfba8561db4cccc16e99e1',1,'cutlass::gemm::GemmGlobalTileTraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af511bba9fc2125516eb1442b1c88d851',1,'cutlass::gemm::GemmSharedLoadTileATraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afd4881aae69c8041d3931982d85f44e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kOperand()']]], + ['kpredicates',['kPredicates',['../structcutlass_1_1PredicateVector.html#afff3a2142d9853606d6ad7c3a459f492',1,'cutlass::PredicateVector']]], + ['kpredicatesperbyte',['kPredicatesPerByte',['../structcutlass_1_1PredicateVector.html#a1387c4a964f971ed4611d750a09ec0b5',1,'cutlass::PredicateVector']]], + ['kpredicatestart',['kPredicateStart',['../structcutlass_1_1PredicateVector.html#acf848dce84c01453ab8a2d00c8d4f86e',1,'cutlass::PredicateVector']]], + ['krequiresloadfence',['kRequiresLoadFence',['../structcutlass_1_1TileLoadIterator.html#a1f3601c595f12e7083919ece9b1ec84eaee9d9d6cea8079c32c9383bde45161fc',1,'cutlass::TileLoadIterator']]], + ['krowmajor',['kRowMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2ba6a287c17f9f5bf53528ae68296beeedb',1,'cutlass::MatrixLayout']]], + ['kscalar',['kScalar',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80aeca44a186befa21ccae44eb4dc7b6954',1,'cutlass::IteratorFragment']]], + ['kscalarsin4b',['kScalarsIn4B',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad77b9084720ad7378e033e54bfb74ce7',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsIn4B()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a774a052f0f98f50e46dda933c81badd5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsIn4B()']]], + ['kscalarsperldga',['kScalarsPerLdgA',['../structcutlass_1_1gemm_1_1GemmConfig.html#a2e0a043c5d4d7959ec1a2214c3ac39ac',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgb',['kScalarsPerLdgB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a849b21fed39aaac1cdd546334739be97',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgc',['kScalarsPerLdgC',['../structcutlass_1_1gemm_1_1GemmConfig.html#aad47c635a73e83bd4b19494864832d31',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsa',['kScalarsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#aa1b75484138923a52b32888fef608d9b',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsb',['kScalarsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a86470d3a44e2b50ee31ec3c9f79927ef',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsd',['kScalarsPerLdsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#adaf2ee5b8e6f7bdb9939cd45a186ca56',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperrow',['kScalarsPerRow',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af1c981ec89a9cabaf5d34231d51a029c',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerRow()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#aa3e378cabce9ed7f199c179c15a12ca4',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerRow()']]], + ['kscalarsperstgd',['kScalarsPerStgD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3633083f4f778215543e376c092745d7',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperstsa',['kScalarsPerStsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#accc95abc55880abdab92253367b4b186',1,'cutlass::gemm::GemmConfig::kScalarsPerStsA()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ae396f7301f934c179e054f68f0420edf',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsPerStsA()']]], + ['kscalarsperstsb',['kScalarsPerStsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#ac0c8c027e3ede14b62d7c7d519551f21',1,'cutlass::gemm::GemmConfig::kScalarsPerStsB()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a47d99d98c783cf1d317698bd465ffa9a',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsPerStsB()']]], + ['kscalarsperstsd',['kScalarsPerStsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3087cdd38e2c65ad0dffdd0587d2cce0',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperthread',['kScalarsPerThread',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ae0b53d76096f9d34df6e16280565c7b1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerThread()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#abb5fdb164b09c8f74f92278f3d68b95f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerThread()']]], + ['kshared',['kShared',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca2804339b2be64ff68ae3042073aaa7cc',1,'cutlass::MemorySpace']]], + ['kskew',['kSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ace14ca9ad11e2cdafcd4a4b63c0df591',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aba6decf87d770becaadd610d9fc27491',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaffe67e519e919bf561142e05da6e6c8',1,'cutlass::gemm::GemmSharedLoadTileATraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac9cd90ecd02809060a2fe6e2da4210f9',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a48baee6541e6359753f1bae5bd864029',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a7e9ce187e12575f0ecd39b2bfe13dddf',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kSkew()']]], + ['kstages',['kStages',['../structcutlass_1_1gemm_1_1GemmConfig.html#a221949c289057e39d439ce03a5b01c52',1,'cutlass::gemm::GemmConfig']]], + ['kstrideh',['kStrideH',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a87918f4d67a9c1e19dcd3c6bfc243e97',1,'cutlass::gemm::GemmGlobalTileCdTraits']]], + ['kthreads',['kThreads',['../structcutlass_1_1gemm_1_1Gemm.html#a41239809be4ebc730dd8ff28c9efc58b',1,'cutlass::gemm::Gemm::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a05039ba8b7d9890903064b1a834dcd3e',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8325bc9d56155ecb6f2ddbd56f4ed23d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a0b2be601de08848afc4418adb97255bf',1,'cutlass::gemm::GemmConfig::kThreads()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a2b6ad449269a178018f02b8cc64ddb85',1,'cutlass::TileTraitsStrideMajor::kThreads()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a53d10552356855bf7379632e72bbe0c9',1,'cutlass::TileTraitsContiguousMajor::kThreads()'],['../structcutlass_1_1TileTraitsWarpRake.html#a11d943e15e397cbc5233b09071dff642',1,'cutlass::TileTraitsWarpRake::kThreads()'],['../structcutlass_1_1TileTraitsStandard.html#a9cbcbe09aa6e9465b63dd22d59435af1',1,'cutlass::TileTraitsStandard::kThreads()']]], + ['kthreadsperwarp',['kThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4246185b8279f245ef5d0650c1eec14f',1,'cutlass::gemm::GemmSharedLoadTileATraits::kThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a049b0bcdf8c5318ee84edeb1e42eaf78',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kThreadsPerWarp()']]], + ['kusage',['kUsage',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a962ffde3b3db78792b67dd1f57ab0a05',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['kvalue',['kValue',['../structcutlass_1_1Extent.html#a2cb62986b9a7c168bf79b083f33c4bad',1,'cutlass::Extent::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a10f7184a9a50de0268efa45dab5dc304',1,'cutlass::Extent< Vector< T, Lanes > >::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a87917a6dfbb1662416c4ea4831669aaf',1,'cutlass::Extent< Vector< T, Lanes > const >::kValue()']]], + ['kvectorsize',['kVectorSize',['../unioncutlass_1_1Vector.html#abf0c16b6f9cb8439835ebdb271d58763afaf4b62c6bcafbf961c5570364a0316e',1,'cutlass::Vector::kVectorSize()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#adc4140a7e40be1e4f81c78a657c7ba73abfbb3cf98db2f8af7150efb91cac4e79',1,'cutlass::Vector< half, kLanes_ >::kVectorSize()']]], + ['kw',['kW',['../structcutlass_1_1Shape.html#a78836a20250ff24c25a6622ad818b421',1,'cutlass::Shape::kW()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa567e61af8a3401d302f3a3ab26418df0',1,'cutlass::IteratorAdvance::kW()']]], + ['kwarpcount',['kWarpCount',['../structcutlass_1_1TileTraitsWarpRake.html#a7a03abe44862077351b0a0a2818d214d',1,'cutlass::TileTraitsWarpRake::kWarpCount()'],['../structcutlass_1_1TileTraitsStandard.html#a1e8f90991e179d13971b84494c989d25',1,'cutlass::TileTraitsStandard::kWarpCount()']]], + ['kwarps',['kWarps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af78a275086a297bd93aed920f57a17be',1,'cutlass::gemm::GemmSharedLoadTileATraits::kWarps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a8b8d6a26a29d5477f526d9ce8c27e3e2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kWarps()']]], + ['kwarpscontiguous',['kWarpsContiguous',['../structcutlass_1_1TileTraitsWarpRake.html#aede0832e95df911b1e6e3f1cc9e593ce',1,'cutlass::TileTraitsWarpRake']]], + ['kwarpsize',['kWarpSize',['../structcutlass_1_1gemm_1_1GemmConfig.html#a677d6a1711cc756b817095b7437cce0e',1,'cutlass::gemm::GemmConfig::kWarpSize()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad25fb7c1b5dc8c5828a69e5a468f490b',1,'cutlass::TileTraitsWarpRake::kWarpSize()'],['../structcutlass_1_1TileTraitsStandard.html#ae9f40eb177c440f01adcc2fe9ca7ec10',1,'cutlass::TileTraitsStandard::kWarpSize()']]], + ['kwarpsstrided',['kWarpsStrided',['../structcutlass_1_1TileTraitsWarpRake.html#a8b1d3fe590f426ce11d597bb98c51bd4',1,'cutlass::TileTraitsWarpRake']]], + ['kwc',['kWc',['../structcutlass_1_1ShapeCount.html#aac5c49469aa80d119c2006291b431276',1,'cutlass::ShapeCount']]], + ['kwmmamatrix',['kWmmaMatrix',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80a21d2b2793bab0d348df40715b8f14419',1,'cutlass::IteratorFragment']]], + ['kwordcount',['kWordCount',['../structcutlass_1_1PredicateVector.html#a734bbfaf3829f73ef0b44fa7db4ccd42',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/all_b.html b/docs/generated-html/search/all_b.html new file mode 100644 index 0000000000..f2a3c8d0e3 --- /dev/null +++ b/docs/generated-html/search/all_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_b.js b/docs/generated-html/search/all_b.js new file mode 100644 index 0000000000..d156678a63 --- /dev/null +++ b/docs/generated-html/search/all_b.js @@ -0,0 +1,27 @@ +var searchData= +[ + ['launch',['launch',['../structcutlass_1_1gemm_1_1Gemm.html#a77ae137aec79b4061a9ffa09aabf641c',1,'cutlass::gemm::Gemm::launch(Params const &params, cudaStream_t stream=cudaStreamDefault)'],['../structcutlass_1_1gemm_1_1Gemm.html#a4f4122a2ae8b9b09a9660e5c2ca9e906',1,'cutlass::gemm::Gemm::launch(CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)']]], + ['layout_20concept',['Layout Concept',['../group__layout__concept.html',1,'']]], + ['lcm',['lcm',['../namespacecutlass.html#af07506fee11de882d926f4e8237eef09',1,'cutlass']]], + ['lda',['lda',['../structcutlass_1_1gemm_1_1GemmDesc.html#a62ad30ba419ccb661e6700da98221789',1,'cutlass::gemm::GemmDesc']]], + ['ldb',['ldb',['../structcutlass_1_1gemm_1_1GemmDesc.html#a7591ce0223b0d05c4d6fca6c67b98bfe',1,'cutlass::gemm::GemmDesc']]], + ['ldc',['ldc',['../structcutlass_1_1gemm_1_1GemmDesc.html#a0f492560cabc45cd492da65b819d09db',1,'cutlass::gemm::GemmDesc']]], + ['ldd',['ldd',['../structcutlass_1_1gemm_1_1GemmDesc.html#a3280e5c5484f5c10d1412bcb70eb77e9',1,'cutlass::gemm::GemmDesc']]], + ['leading_5fdim',['leading_dim',['../classcutlass_1_1TensorRef.html#a8e1c61910ffb49ec64930f66dd342b77',1,'cutlass::TensorRef']]], + ['less',['less',['../structcutlass_1_1platform_1_1less.html',1,'cutlass::platform']]], + ['linear_5fscaling_2eh',['linear_scaling.h',['../linear__scaling_8h.html',1,'']]], + ['linearscaling',['LinearScaling',['../structcutlass_1_1gemm_1_1LinearScaling.html',1,'cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a34df6970f033b3090ad8f4d40063b1b2',1,'cutlass::gemm::LinearScaling::LinearScaling()']]], + ['load',['Load',['../structcutlass_1_1Load.html',1,'cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#aea5ed35a44624684ffa9ada9d09a8893',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage::load()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a01a847858cb330d7d109ddee228e96ce',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a014682b143bce65667075ea15fad184d',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1Load.html#ad033ebc1452d96b18913333bf7068140',1,'cutlass::Load::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#aa9d5e227ea20ad3c6952f296016ec167',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a0e58d26dd68aabb6cb9678f5656c7e6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::load()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a7ba77016bee8e941f7831cc9fbfa994d',1,'cutlass::Load< double, 2, Memory_, true, 16 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a4ee00178c441bdf4d4a1f8cf984bc03f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::load()'],['../structcutlass_1_1TileLoadIterator.html#a9c4b332857f419e6f789a93404dc2140',1,'cutlass::TileLoadIterator::load(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileLoadIterator.html#a1058cdec33393db9c16b28c21d8957db',1,'cutlass::TileLoadIterator::load(Fragment &fragment) const']]], + ['load_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['load_5fiterator',['load_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a42ffcba6af2b5ddfb1f4825a34d43532',1,'cutlass::gemm::GlobalLoadStreamBase::Params::load_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a3be938f8661f9cd10966866b7b80b471',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::load_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#ad2381f2311ee8400a2dc57c19084ef5e',1,'cutlass::gemm::GlobalLoadStreamBase::load_iterator()']]], + ['load_5fpost_5fincrement',['load_post_increment',['../structcutlass_1_1TileLoadIterator.html#a2716b9010d2902b90e63abb0531ee915',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileLoadIterator.html#a195993d58ae0eeb53203116ac02ab38d',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment)']]], + ['load_5fstore_2eh',['load_store.h',['../load__store_8h.html',1,'']]], + ['loaditerator',['LoadIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#acff2a1ab180eec672714cd587a28f9fe',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['log2_5fdown',['log2_down',['../structcutlass_1_1log2__down.html',1,'cutlass']]], + ['log2_5fdown_3c_20n_2c_201_2c_20count_20_3e',['log2_down< N, 1, Count >',['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]], + ['log2_5fup',['log2_up',['../structcutlass_1_1log2__up.html',1,'cutlass']]], + ['log2_5fup_3c_20n_2c_201_2c_20count_20_3e',['log2_up< N, 1, Count >',['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/all_c.html b/docs/generated-html/search/all_c.html new file mode 100644 index 0000000000..637681075b --- /dev/null +++ b/docs/generated-html/search/all_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_c.js b/docs/generated-html/search/all_c.js new file mode 100644 index 0000000000..7420bb3e66 --- /dev/null +++ b/docs/generated-html/search/all_c.js @@ -0,0 +1,20 @@ +var searchData= +[ + ['m',['m',['../structcutlass_1_1gemm_1_1GemmDesc.html#a5c2b3e75cb6873762ba3f85487b78579',1,'cutlass::gemm::GemmDesc::m()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac344bf5ca318dc343bd6fa6bf52d2e22',1,'cutlass::gemm::GemmEpilogue::m()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aaf27c0f2f4ab730ed5c865e9f7d2373b',1,'cutlass::gemm::GemmTraits::Params::m()']]], + ['main_5floop',['main_loop',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#aa5dd7edc3cffa785eb1e5b62c18c74c4',1,'cutlass::gemm::GemmTraits::SharedStorage']]], + ['mainloopsharedstorage',['MainLoopSharedStorage',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['make_5fcoord',['make_Coord',['../namespacecutlass.html#a7419519fa453a121dfa5f26bf87318d9',1,'cutlass::make_Coord(int _0)'],['../namespacecutlass.html#a61d81e5363bcb8a7f6dd70f053242564',1,'cutlass::make_Coord(int _0, int _1)'],['../namespacecutlass.html#a25acf680a7d2592c957a7ac603f4c361',1,'cutlass::make_Coord(int _0, int _1, int _2)'],['../namespacecutlass.html#a9410b1f5956d3aaf4584e65d047428fc',1,'cutlass::make_Coord(int _0, int _1, int _2, int _3)']]], + ['make_5fpair',['make_pair',['../namespacecutlass_1_1platform.html#a90ce74c7faa4e27c888ce56e957b73d5',1,'cutlass::platform']]], + ['make_5fzero',['make_zero',['../namespacecutlass.html#acdb62db582cf90cfd437fc56f4ca7bbf',1,'cutlass::make_zero(Scalar_ &x)'],['../namespacecutlass.html#abc5c00b4986db5a114e774cee9999717',1,'cutlass::make_zero(Vector< Scalar_, kLanes_ > &vec)']]], + ['matrix_5ftraits_2eh',['matrix_traits.h',['../matrix__traits_8h.html',1,'']]], + ['matrixlayout',['MatrixLayout',['../structcutlass_1_1MatrixLayout.html',1,'cutlass']]], + ['max',['max',['../namespacecutlass_1_1platform.html#af6a9a165e53d7e85ae121d5789aa03e0',1,'cutlass::platform']]], + ['memoryspace',['MemorySpace',['../structcutlass_1_1MemorySpace.html',1,'cutlass']]], + ['min',['min',['../namespacecutlass_1_1platform.html#a57c071d2a7305dd4ec60542e66b0c81c',1,'cutlass::platform']]], + ['multiplicandtraits',['MultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a21a3524edaf002b5e5878df3c7eae7e7',1,'cutlass::gemm::GemmGlobalTileTraits']]], + ['multiplicative',['Multiplicative',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375af0cc1d8a713958a86af1063595604597',1,'cutlass::Identity']]], + ['multiply',['multiply',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a522301fbe3e276cb5ef9fbe75bb2ab50',1,'cutlass::gemm::FragmentMultiplyAdd::multiply()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ae62d61ec068ac958753d0a2f5a99d8e2',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply()']]], + ['multiply_5fadd',['multiply_add',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a34bbf209967fef6181d3d46dd27fa0c0',1,'cutlass::gemm::FragmentMultiplyAdd::multiply_add()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a89c8b663af69f13c2a02cb464b5172a5',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add()'],['../structcutlass_1_1gemm_1_1Gemm.html#a2e844037d2527b842de3590cb783a49f',1,'cutlass::gemm::Gemm::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a66486d38349fa20eb065ae9542eb43aa',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad22dd143c304c22c2630aedbfd3459af',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5dcf66c8126ec8adf8e66d4bf5b2f347',1,'cutlass::gemm::ThreadMultiplyAdd::multiply_add()']]], + ['multiplyadd',['MultiplyAdd',['../structcutlass_1_1gemm_1_1GemmConfig.html#a8669096ddbb8c810fb8d2313d62e6ee7',1,'cutlass::gemm::GemmConfig::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#af810544e956b04830c5be7ce41d3b45c',1,'cutlass::gemm::GemmTraits::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ae9facf63912d98e597883bf7efb56cc8',1,'cutlass::gemm::HgemmTraitsHelper::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a87e34d56fa955670331749724bee9fd8',1,'cutlass::gemm::IgemmTraitsHelper::MultiplyAdd()']]], + ['multiplyaddscalar',['MultiplyAddScalar',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a19fb8c9b9a77aebec507635de7da6f21',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afac6f7a62b24396ea6861e6fd10779cc',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a42dd312d4cf5bb53b472389897f9deeb',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aad14588b1515e37ede24915f589d32ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()']]] +]; diff --git a/docs/generated-html/search/all_d.html b/docs/generated-html/search/all_d.html new file mode 100644 index 0000000000..cc52c79fb2 --- /dev/null +++ b/docs/generated-html/search/all_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_d.js b/docs/generated-html/search/all_d.js new file mode 100644 index 0000000000..7c84844e4c --- /dev/null +++ b/docs/generated-html/search/all_d.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['n',['N',['../structcutlass_1_1Coord.html#a3f2f5a9d7ef2063456c4d9f7e57e71ca',1,'cutlass::Coord::N()'],['../structcutlass_1_1gemm_1_1GemmDesc.html#acee9727aa6cb612a25cd6ced4829061a',1,'cutlass::gemm::GemmDesc::n()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9cc371cd2f1a9485583afdacbb7403ea',1,'cutlass::gemm::GemmEpilogue::n()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a437d4b6f1f149849c5ae635a5993e7ac',1,'cutlass::gemm::GemmTraits::Params::n()']]], + ['no',['no',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae096aa6c67f60d8d9c5a4b084118a8af',1,'cutlass::platform::is_base_of_helper']]], + ['noexcept',['noexcept',['../platform_8h.html#a189faadd7f99f6c354db09acbb2aafcd',1,'platform.h']]], + ['nullptr',['nullptr',['../platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936',1,'platform.h']]], + ['nullptr_5ft',['nullptr_t',['../structcutlass_1_1platform_1_1nullptr__t.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/all_e.html b/docs/generated-html/search/all_e.html new file mode 100644 index 0000000000..85b39bd484 --- /dev/null +++ b/docs/generated-html/search/all_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_e.js b/docs/generated-html/search/all_e.js new file mode 100644 index 0000000000..49c17a921c --- /dev/null +++ b/docs/generated-html/search/all_e.js @@ -0,0 +1,34 @@ +var searchData= +[ + ['offset',['offset',['../classcutlass_1_1TensorRef.html#a02ee5d16ed4ce4705a99bb16b2ae1ae8',1,'cutlass::TensorRef::offset()'],['../classcutlass_1_1TensorView.html#a064f3630e69798e7915f910c4ee99ab7',1,'cutlass::TensorView::offset()']]], + ['offset_5ft',['Offset_t',['../classcutlass_1_1TensorView.html#a215946fb080a5253815feb1f639c8f6f',1,'cutlass::TensorView']]], + ['operator_20_26_3d',['operator &=',['../structcutlass_1_1PredicateVector.html#a3dd9aeba8f3cbe7a8198d68d91a0bbb9',1,'cutlass::PredicateVector']]], + ['operator_20b_2a',['operator B*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8d100273203db9018dffbbe84e0b6c76',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20bool',['operator bool',['../classcutlass_1_1platform_1_1unique__ptr.html#a5791650488ae864f10ad04bec4a31005',1,'cutlass::platform::unique_ptr']]], + ['operator_20d_2a',['operator D*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8aadc500baf1492b1a4d05cc8b35fc13',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20value_5ftype',['operator value_type',['../structcutlass_1_1platform_1_1integral__constant.html#a55d25116387f1c6d978462b1d245d675',1,'cutlass::platform::integral_constant']]], + ['operator_21_3d',['operator!=',['../structcutlass_1_1Coord.html#a7fb46873e8f3cf38212703d35bd36995',1,'cutlass::Coord::operator!=()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a3d06715a77740034697686a7977cb685',1,'cutlass::PredicateVector::ConstIterator::operator!=()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a08cb4d1395b88a4451fbb1a27e010887',1,'cutlass::PredicateVector::Iterator::operator!=()'],['../namespacecutlass_1_1platform.html#a248f49adf09654d2cd04bd2760ab2566',1,'cutlass::platform::operator!=()']]], + ['operator_28_29',['operator()',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html#ab8adb983c0573a0015469f40a75287be',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html#abaf0d4459a64b3e9533758b59600bd52',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html#a1e357fe5bc1daef333e6be776a21a2ca',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html#a4e35f0b2ca63a6b981230b73f843f726',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html#a51a325b435b9a53effaa003b3670e410',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html#a5b4a635a521364357386259b0f84c0ba',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html#a4f9cca16303ac9ae29a0eaa11dcc23b6',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html#ace1b936cab289c6884e673312283d422',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html#a9fc1ca09733113f80fe5fe45db3d9b81',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html#a1228edf6cc0f81af520dc77c8792b94c',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html#ad7537f8b30ee6913cf4afa1d3c054e68',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1TiledThreadOffset.html#a7290b6ca9ef0bede634f69bd05450fa2',1,'cutlass::TiledThreadOffset::operator()()'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html#a0e4edffb19218ccbf77995f6d20df000',1,'cutlass::TileTraitsWarpRake::ThreadOffset::operator()()'],['../structcutlass_1_1platform_1_1plus.html#a3bf1e5147df4287bf58ad8f11ea0d98c',1,'cutlass::platform::plus::operator()()'],['../structcutlass_1_1platform_1_1less.html#adfb49ee70a700a8483c70b4b353f6bc5',1,'cutlass::platform::less::operator()()'],['../structcutlass_1_1platform_1_1greater.html#a8d56cf343dd33acebe19d0b51abe3978',1,'cutlass::platform::greater::operator()()'],['../structcutlass_1_1platform_1_1integral__constant.html#a5271a533526a535ae8b783c736252f18',1,'cutlass::platform::integral_constant::operator()()'],['../structcutlass_1_1platform_1_1default__delete.html#a59e6e3cc95685ac34fa6f9cf301b3a15',1,'cutlass::platform::default_delete::operator()()'],['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html#a16c5595a5aec7d7ee34e38bef4a66c87',1,'cutlass::platform::default_delete< T[]>::operator()()']]], + ['operator_2a',['operator*',['../structcutlass_1_1Coord.html#a8e4f7df55a75d040cf50cf9984c04c8a',1,'cutlass::Coord::operator*()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#abbc2bceb6cf8d7f168b8a00eb48c0946',1,'cutlass::PredicateVector::ConstIterator::operator*()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a049b568e0f5de011ee76ce79bcedbab4',1,'cutlass::PredicateVector::Iterator::operator*()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a78016158f99dd87e822a2a2cbd4cec78',1,'cutlass::PredicateVector::TrivialIterator::operator*()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a45a3cb6d8641a6130991d56e84cbb38b',1,'cutlass::platform::unique_ptr::operator*()']]], + ['operator_2a_3d',['operator*=',['../structcutlass_1_1Coord.html#a282b6cc9ac8b2f72720c252791155aad',1,'cutlass::Coord']]], + ['operator_2b',['operator+',['../structcutlass_1_1Coord.html#a3dfc4ce4191097b6c3268696f2a45ef5',1,'cutlass::Coord::operator+()'],['../classcutlass_1_1TensorRef.html#aa7b80d225c01c9dc12aafc515cf15842',1,'cutlass::TensorRef::operator+()']]], + ['operator_2b_2b',['operator++',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a10ee4bb2f206432aa5ee1a83cb046b70',1,'cutlass::PredicateVector::ConstIterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a977a99af3166a58d5bc5a613a1abe7d5',1,'cutlass::PredicateVector::ConstIterator::operator++(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a7dddc0a6b5c958156beef29bedfd1bd3',1,'cutlass::PredicateVector::Iterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a6c7333ad14d545cafc707e78752bf1e3',1,'cutlass::PredicateVector::Iterator::operator++(int)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ad24e9b451064e99fb19955f772c30e6a',1,'cutlass::PredicateVector::TrivialIterator::operator++()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#aa35b9165920b83b9a5a888df83925051',1,'cutlass::PredicateVector::TrivialIterator::operator++(int)']]], + ['operator_2b_3d',['operator+=',['../structcutlass_1_1Coord.html#aeb209486943fa9d42911325b16e49e09',1,'cutlass::Coord']]], + ['operator_2d',['operator-',['../structcutlass_1_1Coord.html#acc510511ffb52bed7f6a52f14b99750d',1,'cutlass::Coord::operator-()'],['../classcutlass_1_1TensorRef.html#a3843ccfd1d097f25eff45dc159709938',1,'cutlass::TensorRef::operator-()']]], + ['operator_2d_2d',['operator--',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2763012a9284e97650b14e20c5668286',1,'cutlass::PredicateVector::ConstIterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2910a714d34a688b8ea560ea2933436b',1,'cutlass::PredicateVector::ConstIterator::operator--(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a69fb5b24eeb43331b7401768e8584e61',1,'cutlass::PredicateVector::Iterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#aad709a11f43b84c88e3ce3a0394f8e8a',1,'cutlass::PredicateVector::Iterator::operator--(int)']]], + ['operator_2d_3d',['operator-=',['../structcutlass_1_1Coord.html#ac1795ec2a5890d8a39840567a4bea88e',1,'cutlass::Coord']]], + ['operator_2d_3e',['operator->',['../classcutlass_1_1platform_1_1unique__ptr.html#afa52edcaef23461ce1f9c1dac349c24b',1,'cutlass::platform::unique_ptr']]], + ['operator_2f',['operator/',['../structcutlass_1_1Coord.html#a87f485be079fa68bcf576da4d56f0ece',1,'cutlass::Coord']]], + ['operator_2f_3d',['operator/=',['../structcutlass_1_1Coord.html#abe91e59962ef0d73aec9c14824f64ecc',1,'cutlass::Coord']]], + ['operator_3c',['operator<',['../namespacecutlass_1_1platform.html#a412dbdbc678ecd12b55fcad4ef4155bd',1,'cutlass::platform']]], + ['operator_3c_3c',['operator<<',['../core__io_8h.html#a4a0d84a2a19a11549b87a2328d58690d',1,'core_io.h']]], + ['operator_3c_3d',['operator<=',['../namespacecutlass_1_1platform.html#a41d573133357bd555f78d33afc1152d3',1,'cutlass::platform']]], + ['operator_3d',['operator=',['../classcutlass_1_1TensorView.html#aa9e9e19f35ce3111f64b763ca49b51ef',1,'cutlass::TensorView']]], + ['operator_3d_3d',['operator==',['../structcutlass_1_1Coord.html#acfa94aabd0c9a71ee994ca479d5f515f',1,'cutlass::Coord::operator==()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#aa2d03d88ac23051803d010f78157c357',1,'cutlass::PredicateVector::ConstIterator::operator==()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a5c5266fcef67c7b263682c4bc4a5000e',1,'cutlass::PredicateVector::Iterator::operator==()'],['../namespacecutlass_1_1platform.html#ab9b8306ae9dc21fa646c49b68fa8e197',1,'cutlass::platform::operator==()']]], + ['operator_3e',['operator>',['../namespacecutlass_1_1platform.html#a9e8e698d40b8df881991fde9ba2a1b12',1,'cutlass::platform']]], + ['operator_3e_3d',['operator>=',['../namespacecutlass_1_1platform.html#ab0f21e67c0a4b5c6952042b502c6816f',1,'cutlass::platform']]], + ['operator_5b_5d',['operator[]',['../structcutlass_1_1Coord.html#ab7fc89de3ccd7096ab275fb5dd40104c',1,'cutlass::Coord::operator[](int dim)'],['../structcutlass_1_1Coord.html#a6eeab0a1686ee25389e1bd017c5f03ae',1,'cutlass::Coord::operator[](int dim) const'],['../structcutlass_1_1Fragment.html#a99fef5f3093b2df50905ab13819b67a0',1,'cutlass::Fragment::operator[](int i)'],['../structcutlass_1_1Fragment.html#a75f51bb6ca84615076aab42ac9d42592',1,'cutlass::Fragment::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a83bb6a3ed588e2d890bf986665d2b7bb',1,'cutlass::FragmentIterator::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a3bd2a9d8467f8db02ca3a01ae0c11ad7',1,'cutlass::FragmentIterator::operator[](int i)'],['../structcutlass_1_1FragmentConstIterator.html#af16f2aa14ff424b038a393b683c4783e',1,'cutlass::FragmentConstIterator::operator[]()'],['../structcutlass_1_1PredicateVector.html#a840985438ac8306ec680eb20edd4e5c5',1,'cutlass::PredicateVector::operator[]()'],['../classcutlass_1_1TensorRef.html#a6a2aa88ed77557c089a165da0df1e974',1,'cutlass::TensorRef::operator[](Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a34e97ab2190b4681d1c1199186d66f1c',1,'cutlass::TensorRef::operator[](int idx) const'],['../classcutlass_1_1TensorView.html#a7fe7e44e15fd1ac58fb55edf72e8fb23',1,'cutlass::TensorView::operator[]()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a5c7a204af07a7d325b0a8303e199a50d',1,'cutlass::platform::unique_ptr::operator[]()'],['../unioncutlass_1_1Vector.html#a250860c921c94a6077344f9e11bf5b02',1,'cutlass::Vector::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector.html#a44cc27bf8a7b789b4ae8538155a50156',1,'cutlass::Vector::operator[](uint32_t i)'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab0516cef8949f5998b5251cc6b6db683',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a8ade80e040264fbd669d3f15c249884e',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i)']]], + ['operator_7c_3d',['operator|=',['../structcutlass_1_1PredicateVector.html#aab9de134132c62de1c062ca57582cdbc',1,'cutlass::PredicateVector']]], + ['outputfragment',['OutputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a8ef69ab595489e142911e8e240fb405a',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::OutputFragment()'],['../structcutlass_1_1Copy.html#a545be6c284d625b0841a10cc9126e14a',1,'cutlass::Copy::OutputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a9c04f0b0eb0293325f661b72168d4fa8',1,'cutlass::gemm::HgemmSwizzle::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a3d89bfc0d94cd695cbe4a61859e5e553',1,'cutlass::gemm::IgemmFloatToInt8Converter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a66ac385a1cd771b95f70ee36cd74e8f7',1,'cutlass::gemm::IgemmInt8ToFloatConverter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac0a4e31e95f8e0c77ae087284bb02ff8',1,'cutlass::gemm::IgemmSwizzle::OutputFragment()']]], + ['outputtile',['OutputTile',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a92a135fac401d43a8d2f14982d90274b',1,'cutlass::gemm::GemmEpilogue::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aed1bd9df5ff579ba3e36ae5ba781c075',1,'cutlass::gemm::GemmEpilogueTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ac30a062bed1a65e45961c4f301b69101',1,'cutlass::gemm::GemmEpilogueTraitsHelper::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ad52b81080731ee1f0d3c2c7eaba6f60d',1,'cutlass::gemm::GemmSharedStoreTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#acb16feebdcad5bbebe9d4d3383c37899',1,'cutlass::gemm::GemmSharedLoadTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a53450f4d7444d6a4c0d2353496c0a4fd',1,'cutlass::gemm::GemmConfig::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a97d7ee63e5d180410b370f095648f367',1,'cutlass::gemm::GemmTraits::OutputTile()']]] +]; diff --git a/docs/generated-html/search/all_f.html b/docs/generated-html/search/all_f.html new file mode 100644 index 0000000000..89fa15a652 --- /dev/null +++ b/docs/generated-html/search/all_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/all_f.js b/docs/generated-html/search/all_f.js new file mode 100644 index 0000000000..05e5e92217 --- /dev/null +++ b/docs/generated-html/search/all_f.js @@ -0,0 +1,26 @@ +var searchData= +[ + ['pad',['pad',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html',1,'cutlass::platform::alignment_of']]], + ['params',['Params',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params'],['../structcutlass_1_1TileIteratorBase_1_1Params.html',1,'cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1TileLoadIterator_1_1Params.html',1,'cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1TileStoreIterator_1_1Params.html',1,'cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html',1,'cutlass::gemm::Gemm< GemmTraits_ >::Params'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html',1,'cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae5209fa80705442693833c63d535161e',1,'cutlass::gemm::GemmEpilogue::Params()'],['../structcutlass_1_1gemm_1_1Gemm.html#a3c292637ab0ec8e73856d0cf6efb6da2',1,'cutlass::gemm::Gemm::params()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a81b028a18df51d3caa1b0ba0c990e362',1,'cutlass::gemm::GemmEpilogue::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab8c79cb1a8157dd00429c93cb4a41322',1,'cutlass::gemm::GemmGlobalIteratorAb::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ac368b1ea1c5ad2209a6ac6bec597600f',1,'cutlass::gemm::GemmGlobalIteratorCd::params()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a0ad4218ad2c10641379b236473e79e84',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::params()'],['../structcutlass_1_1TileLoadIterator.html#aaafe35622751532971c1b7efc54c888b',1,'cutlass::TileLoadIterator::params()'],['../structcutlass_1_1TileStoreIterator.html#a5e6c00b99e0f752137b07f7059f6ee0f',1,'cutlass::TileStoreIterator::params()']]], + ['platform_2eh',['platform.h',['../platform_8h.html',1,'']]], + ['plus',['plus',['../structcutlass_1_1platform_1_1plus.html',1,'cutlass::platform']]], + ['pointer',['pointer',['../classcutlass_1_1platform_1_1unique__ptr.html#ab6ce60d03d11b269c1e151dfa7c696f9',1,'cutlass::platform::unique_ptr::pointer()'],['../structcutlass_1_1FragmentIterator.html#af667793926cdb24d701eb75e0345bbd6',1,'cutlass::FragmentIterator::pointer()'],['../structcutlass_1_1FragmentConstIterator.html#aee37f8ea06127b94a304bb776945509b',1,'cutlass::FragmentConstIterator::pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#ad764f98e770d4685006e6888214dcd4d',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa42c4e7419308926b925909e6a5c719d',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#a6608f7027994aaebdefd004fe94153d9',1,'cutlass::TileLoadIterator::Params::pointer()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a6bbadae6b13aef8f31a77cacd88b068b',1,'cutlass::TileStoreIterator::Params::pointer()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#adcbf24c1b7f45ab5fe8f3ad94154b4d1',1,'cutlass::gemm::GlobalLoadStreamBase::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a3ff6f630b6b317ace1cf6e13fdf3a0cd',1,'cutlass::gemm::GemmGlobalTileTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a3abcfa68ae9904a13195d32d6e6c4bc6',1,'cutlass::gemm::GemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a5be0c995c57faafaad7ae55ae015fc00',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ab883c2a8b90262152faca9cabe515dc4',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#adc4946dfbe914140c6852d0c05b30864',1,'cutlass::gemm::GemmSharedLoadTileATraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afafb3d9ae470c8ef56ec4ca5e66e2182',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a20471c2f569c28538dad8a220ab25624',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1e72b69cf2147e4d194893a64417b920',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a84a73da2a07210fcfad10853b941c85e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1TileLoadIterator.html#a5a179e148ccd770e1703f288624fa9b8',1,'cutlass::TileLoadIterator::Pointer()']]], + ['predicate_5finc_5fadvance',['predicate_inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a2b5d2b02d241e89677c41eb658ace129',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa0367d016549cce6bd896bae364fc248',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_advance()']]], + ['predicate_5finc_5fh',['predicate_inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a36afe18f94aacd0746c8946866371d3c',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5b8177a936ba30a3d68ca238aaf76ff6',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_h()']]], + ['predicate_20iterator_20concept',['Predicate Iterator Concept',['../group__predicate__iterator__concept.html',1,'']]], + ['predicate_5foffset',['predicate_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a3e8f6cf08d23318f3e3263b55cf3b84a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a38f13119cf3111e84914f1bef6f5d985',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_offset()']]], + ['predicate_20tile_20adapter_20concept',['Predicate Tile Adapter Concept',['../group__predicate__tile__adapter.html',1,'']]], + ['predicate_5fvector_2eh',['predicate_vector.h',['../predicate__vector_8h.html',1,'']]], + ['predicate_20vector_20concept',['Predicate Vector Concept',['../group__predicate__vector__concept.html',1,'']]], + ['predicates',['predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#af323c9db74f0de3376edd35eb377bc9c',1,'cutlass::gemm::GemmGlobalIteratorAb::predicates()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad23e6224e37ec1d13dc237ce8ec6e977',1,'cutlass::gemm::GemmGlobalIteratorCd::predicates()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af3c9d62554b1d311d82ba89e09cdd3fa',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::predicates()']]], + ['predicatetileadapter',['PredicateTileAdapter',['../structcutlass_1_1PredicateTileAdapter.html',1,'cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >'],['../structcutlass_1_1PredicateTileAdapter.html#a4c9eb6c6498ccf117427a3b35f7ce5ea',1,'cutlass::PredicateTileAdapter::PredicateTileAdapter()']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1PredicateVector.html',1,'cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3dd74f6e12339a87c0eb8f75fbdc7b9c',1,'cutlass::gemm::GemmGlobalIteratorAb::PredicateVector()'],['../structcutlass_1_1PredicateTileAdapter.html#a72669300eb0bd18ea8124f780862a0e4',1,'cutlass::PredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#ab9143288811a1262f7007f1b76b32e8f',1,'cutlass::ConstPredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1TileIteratorBase.html#a7ab46a9210b421d32af4d1394892cfd5',1,'cutlass::TileIteratorBase::PredicateVector()'],['../structcutlass_1_1TileLoadIterator.html#a64ae02b44f275ef2f016949aec769328',1,'cutlass::TileLoadIterator::PredicateVector()'],['../structcutlass_1_1TileStoreIterator.html#a5aa507eaeb63951f8e69fb223ec41809',1,'cutlass::TileStoreIterator::PredicateVector()'],['../structcutlass_1_1PredicateVector.html#aec1201df19c0ed0516810a3f19353c21',1,'cutlass::PredicateVector::PredicateVector()']]], + ['predicatevector_3c_20base_3a_3aiterations_3a_3akw_20_3e',['PredicateVector< Base::Iterations::kW >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['predicatevector_3c_20shapecount_3c_20typename_20base_3a_3aiterations_20_3e_3a_3akcount_20_3e',['PredicateVector< ShapeCount< typename Base::Iterations >::kCount >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['project',['project',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html#ae91b2350374f1734a30cbed45e14b8e3',1,'cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html#a0f1579013f56fe16ebc147271f163c3c',1,'cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html#af2a323461334a6b55b95074a1973d250',1,'cutlass::gemm::ProjectOperand< GemmOperand::kC, true >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html#ace04040ccb13af5f9a283ca80ffe93d1',1,'cutlass::gemm::ProjectOperand< GemmOperand::kD, true >::project()']]], + ['projectoperand',['ProjectOperand',['../structcutlass_1_1gemm_1_1ProjectOperand.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3aka_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kA, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akb_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kB, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akc_2c_20true_20_3e',['ProjectOperand< GemmOperand::kC, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akd_2c_20true_20_3e',['ProjectOperand< GemmOperand::kD, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_0.html b/docs/generated-html/search/classes_0.html new file mode 100644 index 0000000000..e935fdf727 --- /dev/null +++ b/docs/generated-html/search/classes_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_0.js b/docs/generated-html/search/classes_0.js new file mode 100644 index 0000000000..bf7a2109db --- /dev/null +++ b/docs/generated-html/search/classes_0.js @@ -0,0 +1,22 @@ +var searchData= +[ + ['aligned_5fchunk',['aligned_chunk',['../structcutlass_1_1platform_1_1aligned__chunk.html',1,'cutlass::platform']]], + ['aligned_5fstorage',['aligned_storage',['../structcutlass_1_1platform_1_1aligned__storage.html',1,'cutlass::platform']]], + ['alignedstruct',['AlignedStruct',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignedstruct_3c_20kvectorsize_20_3e',['AlignedStruct< kVectorSize >',['../structcutlass_1_1AlignedStruct.html',1,'cutlass']]], + ['alignment_5fof',['alignment_of',['../structcutlass_1_1platform_1_1alignment__of.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20value_5ft_20_3e',['alignment_of< const value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20const_20volatile_20value_5ft_20_3e',['alignment_of< const volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double2_20_3e',['alignment_of< double2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20double4_20_3e',['alignment_of< double4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20float4_20_3e',['alignment_of< float4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20int4_20_3e',['alignment_of< int4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20long4_20_3e',['alignment_of< long4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong2_20_3e',['alignment_of< longlong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20longlong4_20_3e',['alignment_of< longlong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20uint4_20_3e',['alignment_of< uint4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulong4_20_3e',['alignment_of< ulong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong2_20_3e',['alignment_of< ulonglong2 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20ulonglong4_20_3e',['alignment_of< ulonglong4 >',['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html',1,'cutlass::platform']]], + ['alignment_5fof_3c_20volatile_20value_5ft_20_3e',['alignment_of< volatile value_t >',['../structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_1.html b/docs/generated-html/search/classes_1.html new file mode 100644 index 0000000000..3df6e80acf --- /dev/null +++ b/docs/generated-html/search/classes_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_1.js b/docs/generated-html/search/classes_1.js new file mode 100644 index 0000000000..1f2829239b --- /dev/null +++ b/docs/generated-html/search/classes_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['bool_5fconstant',['bool_constant',['../structcutlass_1_1platform_1_1bool__constant.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_10.html b/docs/generated-html/search/classes_10.html new file mode 100644 index 0000000000..0477a26692 --- /dev/null +++ b/docs/generated-html/search/classes_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_10.js b/docs/generated-html/search/classes_10.js new file mode 100644 index 0000000000..348d5341b5 --- /dev/null +++ b/docs/generated-html/search/classes_10.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['unique_5fptr',['unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_11.html b/docs/generated-html/search/classes_11.html new file mode 100644 index 0000000000..6bbc0d1423 --- /dev/null +++ b/docs/generated-html/search/classes_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_11.js b/docs/generated-html/search/classes_11.js new file mode 100644 index 0000000000..f0fc149310 --- /dev/null +++ b/docs/generated-html/search/classes_11.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['vector',['Vector',['../unioncutlass_1_1Vector.html',1,'cutlass']]], + ['vector_3c_20half_2c_20klanes_5f_20_3e',['Vector< half, kLanes_ >',['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html',1,'cutlass']]], + ['vectorize',['Vectorize',['../structcutlass_1_1Vectorize.html',1,'cutlass']]], + ['vectorize_3c_20element_5f_2c_201_20_3e',['Vectorize< Element_, 1 >',['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html',1,'cutlass']]], + ['vectortraits',['VectorTraits',['../structcutlass_1_1VectorTraits.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['VectorTraits< Vector< T, Lanes > >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['vectortraits_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['VectorTraits< Vector< T, Lanes > const >',['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_12.html b/docs/generated-html/search/classes_12.html new file mode 100644 index 0000000000..c889f6d633 --- /dev/null +++ b/docs/generated-html/search/classes_12.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_12.js b/docs/generated-html/search/classes_12.js new file mode 100644 index 0000000000..dadc97812a --- /dev/null +++ b/docs/generated-html/search/classes_12.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['wmmagemmglobaliteratorcd',['WmmaGemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html',1,'cutlass::gemm']]], + ['wmmagemmglobaliteratorcdtraits',['WmmaGemmGlobalIteratorCdTraits',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_2.html b/docs/generated-html/search/classes_2.html new file mode 100644 index 0000000000..028694ffaa --- /dev/null +++ b/docs/generated-html/search/classes_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_2.js b/docs/generated-html/search/classes_2.js new file mode 100644 index 0000000000..ee2cf16546 --- /dev/null +++ b/docs/generated-html/search/classes_2.js @@ -0,0 +1,23 @@ +var searchData= +[ + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1ClearAccumulators.html',1,'cutlass::gemm']]], + ['computeoffsetfromshape',['ComputeOffsetFromShape',['../structcutlass_1_1ComputeOffsetFromShape.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromshape_3c_20shape_3c_201_2c_20ksh_5f_2c_20ksw_5f_2c_20ksc_5f_20_3e_20_3e',['ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >',['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides',['ComputeOffsetFromStrides',['../structcutlass_1_1ComputeOffsetFromStrides.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html',1,'cutlass']]], + ['computeoffsetfromstrides_3c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html',1,'cutlass']]], + ['computethreadoffsetfromstrides',['ComputeThreadOffsetFromStrides',['../structcutlass_1_1ComputeThreadOffsetFromStrides.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_201_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_201_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html',1,'cutlass']]], + ['computethreadoffsetfromstrides_3c_20shape_3c_201_2c_20t_5fh_5f_2c_20t_5fw_5f_2c_20t_5fc_5f_20_3e_2c_20shape_3c_201_2c_20s_5fh_5f_2c_20s_5fw_5f_2c_20s_5fc_5f_20_3e_20_3e',['ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >',['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html',1,'cutlass']]], + ['conditional',['conditional',['../structcutlass_1_1platform_1_1conditional.html',1,'cutlass::platform']]], + ['conditional_3c_20false_2c_20t_2c_20f_20_3e',['conditional< false, T, F >',['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html',1,'cutlass::platform']]], + ['constiterator',['ConstIterator',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html',1,'cutlass::PredicateVector']]], + ['constpredicatetileadapter',['ConstPredicateTileAdapter',['../structcutlass_1_1ConstPredicateTileAdapter.html',1,'cutlass']]], + ['convert',['Convert',['../structcutlass_1_1Convert.html',1,'cutlass']]], + ['convert_3c_20fragment_3c_20inputscalar_5f_2c_20kscalars_5f_20_3e_2c_20fragment_3c_20outputscalar_5f_2c_20kscalars_5f_20_3e_20_3e',['Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html',1,'cutlass']]], + ['coord',['Coord',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_3c_204_20_3e',['Coord< 4 >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['coord_3c_20rank_20_3e',['Coord< Rank >',['../structcutlass_1_1Coord.html',1,'cutlass']]], + ['copy',['Copy',['../structcutlass_1_1Copy.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_3.html b/docs/generated-html/search/classes_3.html new file mode 100644 index 0000000000..2b1abe383a --- /dev/null +++ b/docs/generated-html/search/classes_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_3.js b/docs/generated-html/search/classes_3.js new file mode 100644 index 0000000000..ec9fedbdda --- /dev/null +++ b/docs/generated-html/search/classes_3.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['default_5fdelete',['default_delete',['../structcutlass_1_1platform_1_1default__delete.html',1,'cutlass::platform']]], + ['default_5fdelete_3c_20t_5b_5d_3e',['default_delete< T[]>',['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html',1,'cutlass::platform']]], + ['dgemmconfig',['DgemmConfig',['../structcutlass_1_1gemm_1_1DgemmConfig.html',1,'cutlass::gemm']]], + ['dgemmtraits',['DgemmTraits',['../structcutlass_1_1gemm_1_1DgemmTraits.html',1,'cutlass::gemm']]], + ['divide_5fassert',['divide_assert',['../structcutlass_1_1divide__assert.html',1,'cutlass']]], + ['dummy',['dummy',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/classes_4.html b/docs/generated-html/search/classes_4.html new file mode 100644 index 0000000000..8735214942 --- /dev/null +++ b/docs/generated-html/search/classes_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_4.js b/docs/generated-html/search/classes_4.js new file mode 100644 index 0000000000..5fb3b19225 --- /dev/null +++ b/docs/generated-html/search/classes_4.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['enable_5fif',['enable_if',['../structcutlass_1_1platform_1_1enable__if.html',1,'cutlass::platform']]], + ['enable_5fif_3c_20false_2c_20t_20_3e',['enable_if< false, T >',['../structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html',1,'cutlass::platform']]], + ['extent',['Extent',['../structcutlass_1_1Extent.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20_3e',['Extent< Vector< T, Lanes > >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html',1,'cutlass']]], + ['extent_3c_20vector_3c_20t_2c_20lanes_20_3e_20const_20_3e',['Extent< Vector< T, Lanes > const >',['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_5.html b/docs/generated-html/search/classes_5.html new file mode 100644 index 0000000000..ba8b1c69bf --- /dev/null +++ b/docs/generated-html/search/classes_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_5.js b/docs/generated-html/search/classes_5.js new file mode 100644 index 0000000000..6202ed097f --- /dev/null +++ b/docs/generated-html/search/classes_5.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['fragment',['Fragment',['../structcutlass_1_1Fragment.html',1,'cutlass']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1FragmentConstIterator.html',1,'cutlass']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1FragmentIterator.html',1,'cutlass']]], + ['fragmentload',['FragmentLoad',['../structcutlass_1_1FragmentLoad.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html',1,'cutlass']]], + ['fragmentload_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html',1,'cutlass']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html',1,'cutlass::gemm']]], + ['fragmentmultiplyadd_3c_20half_20_3e',['FragmentMultiplyAdd< half >',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html',1,'cutlass::gemm']]], + ['fragmentstore',['FragmentStore',['../structcutlass_1_1FragmentStore.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akscalar_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html',1,'cutlass']]], + ['fragmentstore_3c_20iteratorfragment_3a_3akwmmamatrix_2c_20kaccesssize_2c_20scalar_5f_2c_20memory_5f_2c_20fragmentelement_5f_2c_20kstride_20_3e',['FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_6.html b/docs/generated-html/search/classes_6.html new file mode 100644 index 0000000000..f5850938da --- /dev/null +++ b/docs/generated-html/search/classes_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_6.js b/docs/generated-html/search/classes_6.js new file mode 100644 index 0000000000..bc65aaca47 --- /dev/null +++ b/docs/generated-html/search/classes_6.js @@ -0,0 +1,49 @@ +var searchData= +[ + ['gemm',['Gemm',['../structcutlass_1_1gemm_1_1Gemm.html',1,'cutlass::gemm']]], + ['gemmconfig',['GemmConfig',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20double_2c_20double_2c_20double_2c_20double_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20double_2c_20double_2c_20double_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_202_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_202_2c_201_2c_202_2c_201_2c_202_20_3e',['GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20float_2c_20float_2c_20float_2c_20float_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20float_2c_20float_2c_20float_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_204_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_204_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20half_2c_20half_2c_20half_2c_20half_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20half_2c_20half_2c_20half_20_3e_2c_20kscalarsperldga_5f_2c_20kscalarsperldga_5f_2c_208_2c_20kscalarsperldgb_5f_2c_20kscalarsperldgb_5f_2c_208_2c_202_2c_208_2c_202_2c_202_20_3e',['GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20int8_5ft_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_204_2c_202_20_3e',['GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmconfig_3c_20int8_5ft_2c_20int8_5ft_2c_20scalard_5f_2c_20scalard_5f_2c_20outputtile_5f_2c_20threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20shape_3c_201_2c_204_2c_208_20_3e_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e_2c_204_2c_204_2c_2016_2c_204_2c_204_2c_2016_2c_201_2c_204_2c_201_2c_202_20_3e',['GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >',['../structcutlass_1_1gemm_1_1GemmConfig.html',1,'cutlass::gemm']]], + ['gemmdesc',['GemmDesc',['../structcutlass_1_1gemm_1_1GemmDesc.html',1,'cutlass::gemm']]], + ['gemmepilogue',['GemmEpilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits',['GemmEpilogueTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits_3c_20gemmconfig_5f_3a_3aoutputtile_2c_20gemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraits_3c_20igemmconfig_5f_3a_3aoutputtile_2c_20igemmconfig_5f_3a_3aaccumulators_2c_20helper_5f_3a_3agloballoaditeratorc_2c_20helper_5f_3a_3aglobaltransformerc_2c_20helper_5f_3a_3aglobaltransformerd_2c_20helper_5f_3a_3aglobalstoreiteratord_2c_20helper_5f_3a_3asharedstoreiteratord_2c_20helper_5f_3a_3asharedstoretransformerd_2c_20helper_5f_3a_3asharedloaditeratord_2c_20helper_5f_3a_3aiterations_2c_20helper_5f_3a_3adelta_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper',['GemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmepiloguetraitshelper_3c_20igemmconfig_5f_2c_20epiloguefunctor_5f_2c_20index_5f_20_3e',['GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['gemmglobaliteratorab',['GemmGlobalIteratorAb',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html',1,'cutlass::gemm']]], + ['gemmglobaliteratorcd',['GemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html',1,'cutlass::gemm']]], + ['gemmglobaltilecdtraits',['GemmGlobalTileCdTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits',['GemmGlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmglobaltiletraits_3c_20gemmoperand_3a_3akc_2c_20matrixlayout_3a_3akcolumnmajor_2c_20scalar_5f_2c_20tile_5f_2c_20threads_5f_2c_20kaccesssize_5f_20_3e',['GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html',1,'cutlass::gemm']]], + ['gemmmultiplicandtraits',['GemmMultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html',1,'cutlass::gemm']]], + ['gemmoperand',['GemmOperand',['../structcutlass_1_1GemmOperand.html',1,'cutlass']]], + ['gemmoperandtraitsab',['GemmOperandTraitsAb',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html',1,'cutlass::gemm']]], + ['gemmsharedloadtileatraits',['GemmSharedLoadTileATraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtilebtraits',['GemmSharedLoadTileBTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html',1,'cutlass::gemm']]], + ['gemmsharedloadtiledtraits',['GemmSharedLoadTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretileabtraits',['GemmSharedStoreTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstoretiledtraits',['GemmSharedStoreTileDTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html',1,'cutlass::gemm']]], + ['gemmsharedstorewithskewtileabtraits',['GemmSharedStoreWithSkewTileAbTraits',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera',['GemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb',['GemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20klayout_5f_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< kLayout_, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['gemmtraits',['GemmTraits',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20epilogue_5f_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20gemmconfig_5f_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3agloballoadstreamb_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreama_2c_20simplifiedgemmtraitshelper_3c_20gemmtiletraitshelpera_3c_20klayouta_5f_2c_20gemmconfig_5f_20_3e_2c_20gemmtiletraitshelperb_3c_20klayoutb_5f_2c_20gemmconfig_5f_20_3e_2c_20index_5f_20_3e_20_3a_3asharedloadstreamb_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20identityblockswizzle_2c_20index_5f_2c_20clearaccumulators_3c_20gemmconfig_5f_3a_3aaccumulators_3a_3aelement_20_3e_20_3e',['GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['gemmtraits_3c_20helper_5f_3a_3agemmconfig_2c_20helper_5f_3a_3agloballoadstreama_2c_20helper_5f_3a_3agloballoadstreamb_2c_20helper_5f_3a_3asharedloadstreama_2c_20helper_5f_3a_3asharedloadstreamb_2c_20helper_5f_3a_3aepilogue_2c_20identityblockswizzle_2c_20index_5f_2c_20helper_5f_3a_3aclearaccumulators_20_3e',['GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >',['../structcutlass_1_1gemm_1_1GemmTraits.html',1,'cutlass::gemm']]], + ['getextent',['GetExtent',['../structcutlass_1_1gemm_1_1GetExtent.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3aka_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kA, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['getextent_3c_20gemmoperand_3a_3akb_2c_20tile_5f_20_3e',['GetExtent< GemmOperand::kB, Tile_ >',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html',1,'cutlass::gemm']]], + ['globalloadstream',['GlobalLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream'],['../structcutlass_1_1gemm_1_1GlobalLoadStream.html',1,'cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >']]], + ['globalloadstreambase',['GlobalLoadStreamBase',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html',1,'cutlass::gemm']]], + ['greater',['greater',['../structcutlass_1_1platform_1_1greater.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_7.html b/docs/generated-html/search/classes_7.html new file mode 100644 index 0000000000..6418529c03 --- /dev/null +++ b/docs/generated-html/search/classes_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_7.js b/docs/generated-html/search/classes_7.js new file mode 100644 index 0000000000..ab1c551ad7 --- /dev/null +++ b/docs/generated-html/search/classes_7.js @@ -0,0 +1,18 @@ +var searchData= +[ + ['hgemmconfig',['HgemmConfig',['../structcutlass_1_1gemm_1_1HgemmConfig.html',1,'cutlass::gemm']]], + ['hgemmcrosswiseglobaltiletraits',['HgemmCrosswiseGlobalTileTraits',['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html',1,'cutlass::gemm']]], + ['hgemmswizzle',['HgemmSwizzle',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelpera',['HgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelpera_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb',['HgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['hgemmtiletraitshelperb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['hgemmtraits',['HgemmTraits',['../structcutlass_1_1gemm_1_1HgemmTraits.html',1,'cutlass::gemm']]], + ['hgemmtraitshelper',['HgemmTraitsHelper',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['hgemmtransformera',['HgemmTransformerA',['../structcutlass_1_1gemm_1_1HgemmTransformerA.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb',['HgemmTransformerB',['../structcutlass_1_1gemm_1_1HgemmTransformerB.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['hgemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_8.html b/docs/generated-html/search/classes_8.html new file mode 100644 index 0000000000..87af6f6017 --- /dev/null +++ b/docs/generated-html/search/classes_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_8.js b/docs/generated-html/search/classes_8.js new file mode 100644 index 0000000000..b0bdbcd65e --- /dev/null +++ b/docs/generated-html/search/classes_8.js @@ -0,0 +1,77 @@ +var searchData= +[ + ['identity',['Identity',['../structcutlass_1_1Identity.html',1,'cutlass']]], + ['identityblockswizzle',['IdentityBlockSwizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html',1,'cutlass::gemm']]], + ['igemmconfig',['IgemmConfig',['../structcutlass_1_1gemm_1_1IgemmConfig.html',1,'cutlass::gemm']]], + ['igemmconfig_3c_20outputtile_5f_2c_20int8_5ft_2c_20accumulatorsperthread_5f_20_3e',['IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >',['../structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html',1,'cutlass::gemm']]], + ['igemmcontiguousglobaltiletraits',['IgemmContiguousGlobalTileTraits',['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html',1,'cutlass::gemm']]], + ['igemmepilogue',['IgemmEpilogue',['../structcutlass_1_1gemm_1_1IgemmEpilogue.html',1,'cutlass::gemm']]], + ['igemmepilogue_3c_20gemmepiloguetraits_5f_2c_20true_20_3e',['IgemmEpilogue< GemmEpilogueTraits_, true >',['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar',['IgemmEpilogueScalar',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html',1,'cutlass::gemm']]], + ['igemmepiloguescalar_3c_20int_20_3e',['IgemmEpilogueScalar< int >',['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html',1,'cutlass::gemm']]], + ['igemmepiloguetraits',['IgemmEpilogueTraits',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['igemmepiloguetraitshelper',['IgemmEpilogueTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmfloattoint8converter',['IgemmFloatToInt8Converter',['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html',1,'cutlass::gemm']]], + ['igemmgloballoadtransformer',['IgemmGlobalLoadTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html',1,'cutlass::gemm']]], + ['igemmgloballoadtransformer_3c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_2c_20float_20_3e',['IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >',['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer',['IgemmGlobalStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmglobalstoretransformer_3c_20float_2c_20fragment_3c_20int8_5ft_2c_20kelements_5f_20_3e_20_3e',['IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >',['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html',1,'cutlass::gemm']]], + ['igemmint8tofloatconverter',['IgemmInt8ToFloatConverter',['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html',1,'cutlass::gemm']]], + ['igemmsharedstoretransformer',['IgemmSharedStoreTransformer',['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html',1,'cutlass::gemm']]], + ['igemmswizzle',['IgemmSwizzle',['../structcutlass_1_1gemm_1_1IgemmSwizzle.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelpera',['IgemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelpera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb',['IgemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.html',1,'cutlass::gemm']]], + ['igemmtiletraitshelperb_3c_20matrixlayout_3a_3akrowmajor_2c_20gemmconfig_5f_20_3e',['IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >',['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html',1,'cutlass::gemm']]], + ['igemmtraits',['IgemmTraits',['../structcutlass_1_1gemm_1_1IgemmTraits.html',1,'cutlass::gemm']]], + ['igemmtraitshelper',['IgemmTraitsHelper',['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html',1,'cutlass::gemm']]], + ['igemmtransformera',['IgemmTransformerA',['../structcutlass_1_1gemm_1_1IgemmTransformerA.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformera_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb',['IgemmTransformerB',['../structcutlass_1_1gemm_1_1IgemmTransformerB.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akcolumnmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['igemmtransformerb_3c_20matrixlayout_3a_3akrowmajor_2c_20iterator_5f_20_3e',['IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >',['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html',1,'cutlass::gemm']]], + ['integral_5fconstant',['integral_constant',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_20v_20_3e',['integral_constant< bool, V >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5farithmetic_3c_20t_20_3e_3a_3avalue_7c_7cis_5fvoid_3c_20t_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20nullptr_5ft_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fbase_5fof_5fhelper_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_7c_7c_28is_5fsame_3c_20remove_5fcv_3c_20baset_20_3e_3a_3atype_2c_20remove_5fcv_3c_20derivedt_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_base_of_helper< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)||(is_same< remove_cv< BaseT >::type, remove_cv< DerivedT >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5ffundamental_3c_20t_20_3e_3a_3avalue_7c_7cis_5fpointer_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fintegral_3c_20t_20_3e_3a_3avalue_7c_7cis_5ffloating_5fpoint_3c_20t_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28is_5fsame_3c_20float_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_7c_7cis_5fsame_3c_20double_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e_3a_3avalue_29_3e',['integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['integral_5fconstant_3c_20bool_2c_28n_20_26_28n_20_2d_201_29_29_3d_3d0_20_3e',['integral_constant< bool,(N &(N - 1))==0 >',['../structcutlass_1_1platform_1_1integral__constant.html',1,'cutlass::platform']]], + ['is_5farithmetic',['is_arithmetic',['../structcutlass_1_1platform_1_1is__arithmetic.html',1,'cutlass::platform']]], + ['is_5fbase_5fof',['is_base_of',['../structcutlass_1_1platform_1_1is__base__of.html',1,'cutlass::platform']]], + ['is_5fbase_5fof_5fhelper',['is_base_of_helper',['../structcutlass_1_1platform_1_1is__base__of__helper.html',1,'cutlass::platform']]], + ['is_5ffloating_5fpoint',['is_floating_point',['../structcutlass_1_1platform_1_1is__floating__point.html',1,'cutlass::platform']]], + ['is_5ffundamental',['is_fundamental',['../structcutlass_1_1platform_1_1is__fundamental.html',1,'cutlass::platform']]], + ['is_5fintegral',['is_integral',['../structcutlass_1_1platform_1_1is__integral.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20char_20_3e',['is_integral< char >',['../structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20t_20_3e',['is_integral< const T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20const_20volatile_20t_20_3e',['is_integral< const volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20int_20_3e',['is_integral< int >',['../structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20_3e',['is_integral< long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20long_20long_20_3e',['is_integral< long long >',['../structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20short_20_3e',['is_integral< short >',['../structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20signed_20char_20_3e',['is_integral< signed char >',['../structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20char_20_3e',['is_integral< unsigned char >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20int_20_3e',['is_integral< unsigned int >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20_3e',['is_integral< unsigned long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20long_20long_20_3e',['is_integral< unsigned long long >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20unsigned_20short_20_3e',['is_integral< unsigned short >',['../structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html',1,'cutlass::platform']]], + ['is_5fintegral_3c_20volatile_20t_20_3e',['is_integral< volatile T >',['../structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['is_5fpointer',['is_pointer',['../structcutlass_1_1platform_1_1is__pointer.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper',['is_pointer_helper',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_pointer_helper< remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__pointer__helper.html',1,'cutlass::platform']]], + ['is_5fpointer_5fhelper_3c_20t_20_2a_20_3e',['is_pointer_helper< T * >',['../structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html',1,'cutlass::platform']]], + ['is_5fpow2',['is_pow2',['../structcutlass_1_1is__pow2.html',1,'cutlass']]], + ['is_5fsame',['is_same',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20a_2c_20a_20_3e',['is_same< A, A >',['../structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html',1,'cutlass::platform']]], + ['is_5fsame_3c_20void_2c_20remove_5fcv_3c_20t_20_3e_3a_3atype_20_3e',['is_same< void, remove_cv< T >::type >',['../structcutlass_1_1platform_1_1is__same.html',1,'cutlass::platform']]], + ['is_5ftrivially_5fcopyable',['is_trivially_copyable',['../structcutlass_1_1platform_1_1is__trivially__copyable.html',1,'cutlass::platform']]], + ['is_5fvoid',['is_void',['../structcutlass_1_1platform_1_1is__void.html',1,'cutlass::platform']]], + ['is_5fvolatile',['is_volatile',['../structcutlass_1_1platform_1_1is__volatile.html',1,'cutlass::platform']]], + ['is_5fvolatile_3c_20volatile_20t_20_3e',['is_volatile< volatile T >',['../structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['iterator',['Iterator',['../classcutlass_1_1PredicateVector_1_1Iterator.html',1,'cutlass::PredicateVector']]], + ['iteratoradvance',['IteratorAdvance',['../structcutlass_1_1IteratorAdvance.html',1,'cutlass']]], + ['iteratorfragment',['IteratorFragment',['../structcutlass_1_1IteratorFragment.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_9.html b/docs/generated-html/search/classes_9.html new file mode 100644 index 0000000000..f830ae04b4 --- /dev/null +++ b/docs/generated-html/search/classes_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_9.js b/docs/generated-html/search/classes_9.js new file mode 100644 index 0000000000..cf0d35f6dc --- /dev/null +++ b/docs/generated-html/search/classes_9.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['less',['less',['../structcutlass_1_1platform_1_1less.html',1,'cutlass::platform']]], + ['linearscaling',['LinearScaling',['../structcutlass_1_1gemm_1_1LinearScaling.html',1,'cutlass::gemm']]], + ['load',['Load',['../structcutlass_1_1Load.html',1,'cutlass']]], + ['load_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['load_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Load< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['log2_5fdown',['log2_down',['../structcutlass_1_1log2__down.html',1,'cutlass']]], + ['log2_5fdown_3c_20n_2c_201_2c_20count_20_3e',['log2_down< N, 1, Count >',['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]], + ['log2_5fup',['log2_up',['../structcutlass_1_1log2__up.html',1,'cutlass']]], + ['log2_5fup_3c_20n_2c_201_2c_20count_20_3e',['log2_up< N, 1, Count >',['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_a.html b/docs/generated-html/search/classes_a.html new file mode 100644 index 0000000000..0fd3b7ac36 --- /dev/null +++ b/docs/generated-html/search/classes_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_a.js b/docs/generated-html/search/classes_a.js new file mode 100644 index 0000000000..0556c78d05 --- /dev/null +++ b/docs/generated-html/search/classes_a.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['mainloopsharedstorage',['MainLoopSharedStorage',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['matrixlayout',['MatrixLayout',['../structcutlass_1_1MatrixLayout.html',1,'cutlass']]], + ['memoryspace',['MemorySpace',['../structcutlass_1_1MemorySpace.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_b.html b/docs/generated-html/search/classes_b.html new file mode 100644 index 0000000000..886abdfcd0 --- /dev/null +++ b/docs/generated-html/search/classes_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_b.js b/docs/generated-html/search/classes_b.js new file mode 100644 index 0000000000..02d4e0ba88 --- /dev/null +++ b/docs/generated-html/search/classes_b.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['nullptr_5ft',['nullptr_t',['../structcutlass_1_1platform_1_1nullptr__t.html',1,'cutlass::platform']]] +]; diff --git a/docs/generated-html/search/classes_c.html b/docs/generated-html/search/classes_c.html new file mode 100644 index 0000000000..52ec267634 --- /dev/null +++ b/docs/generated-html/search/classes_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_c.js b/docs/generated-html/search/classes_c.js new file mode 100644 index 0000000000..5b7a2f9977 --- /dev/null +++ b/docs/generated-html/search/classes_c.js @@ -0,0 +1,15 @@ +var searchData= +[ + ['pad',['pad',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html',1,'cutlass::platform::alignment_of']]], + ['params',['Params',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params'],['../structcutlass_1_1TileIteratorBase_1_1Params.html',1,'cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params'],['../structcutlass_1_1TileLoadIterator_1_1Params.html',1,'cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1TileStoreIterator_1_1Params.html',1,'cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params'],['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html',1,'cutlass::gemm::Gemm< GemmTraits_ >::Params'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html',1,'cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html',1,'cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params']]], + ['plus',['plus',['../structcutlass_1_1platform_1_1plus.html',1,'cutlass::platform']]], + ['predicatetileadapter',['PredicateTileAdapter',['../structcutlass_1_1PredicateTileAdapter.html',1,'cutlass']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['predicatevector_3c_20base_3a_3aiterations_3a_3akw_20_3e',['PredicateVector< Base::Iterations::kW >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['predicatevector_3c_20shapecount_3c_20typename_20base_3a_3aiterations_20_3e_3a_3akcount_20_3e',['PredicateVector< ShapeCount< typename Base::Iterations >::kCount >',['../structcutlass_1_1PredicateVector.html',1,'cutlass']]], + ['projectoperand',['ProjectOperand',['../structcutlass_1_1gemm_1_1ProjectOperand.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3aka_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kA, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akb_2c_20kstrided_20_3e',['ProjectOperand< GemmOperand::kB, Kstrided >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akc_2c_20true_20_3e',['ProjectOperand< GemmOperand::kC, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html',1,'cutlass::gemm']]], + ['projectoperand_3c_20gemmoperand_3a_3akd_2c_20true_20_3e',['ProjectOperand< GemmOperand::kD, true >',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html',1,'cutlass::gemm']]] +]; diff --git a/docs/generated-html/search/classes_d.html b/docs/generated-html/search/classes_d.html new file mode 100644 index 0000000000..652508df3d --- /dev/null +++ b/docs/generated-html/search/classes_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_d.js b/docs/generated-html/search/classes_d.js new file mode 100644 index 0000000000..b7e543fbac --- /dev/null +++ b/docs/generated-html/search/classes_d.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['remove_5fconst',['remove_const',['../structcutlass_1_1platform_1_1remove__const.html',1,'cutlass::platform']]], + ['remove_5fconst_3c_20const_20t_20_3e',['remove_const< const T >',['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html',1,'cutlass::platform']]], + ['remove_5fcv',['remove_cv',['../structcutlass_1_1platform_1_1remove__cv.html',1,'cutlass::platform']]], + ['remove_5fvolatile',['remove_volatile',['../structcutlass_1_1platform_1_1remove__volatile.html',1,'cutlass::platform']]], + ['remove_5fvolatile_3c_20volatile_20t_20_3e',['remove_volatile< volatile T >',['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html',1,'cutlass::platform']]], + ['reshapethreads',['ReshapeThreads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html',1,'cutlass::gemm']]], + ['reshapethreads_3c_20tile_5f_2c_20threads_5f_2c_20true_20_3e',['ReshapeThreads< Tile_, Threads_, true >',['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html',1,'cutlass::gemm']]], + ['reshapetile',['ReshapeTile',['../structcutlass_1_1ReshapeTile.html',1,'cutlass']]], + ['reshapetile_3c_20tile_5f_2c_20kaccesssize_5f_2c_20true_20_3e',['ReshapeTile< Tile_, kAccessSize_, true >',['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/classes_e.html b/docs/generated-html/search/classes_e.html new file mode 100644 index 0000000000..7d4e9a5631 --- /dev/null +++ b/docs/generated-html/search/classes_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_e.js b/docs/generated-html/search/classes_e.js new file mode 100644 index 0000000000..82b7413055 --- /dev/null +++ b/docs/generated-html/search/classes_e.js @@ -0,0 +1,34 @@ +var searchData= +[ + ['sgemmconfig',['SgemmConfig',['../structcutlass_1_1gemm_1_1SgemmConfig.html',1,'cutlass::gemm']]], + ['sgemmtraits',['SgemmTraits',['../structcutlass_1_1gemm_1_1SgemmTraits.html',1,'cutlass::gemm']]], + ['shape',['Shape',['../structcutlass_1_1Shape.html',1,'cutlass']]], + ['shapeadd',['ShapeAdd',['../structcutlass_1_1ShapeAdd.html',1,'cutlass']]], + ['shapecount',['ShapeCount',['../structcutlass_1_1ShapeCount.html',1,'cutlass']]], + ['shapediv',['ShapeDiv',['../structcutlass_1_1ShapeDiv.html',1,'cutlass']]], + ['shapemax',['ShapeMax',['../structcutlass_1_1ShapeMax.html',1,'cutlass']]], + ['shapemin',['ShapeMin',['../structcutlass_1_1ShapeMin.html',1,'cutlass']]], + ['shapemul',['ShapeMul',['../structcutlass_1_1ShapeMul.html',1,'cutlass']]], + ['shapescale',['ShapeScale',['../structcutlass_1_1ShapeScale.html',1,'cutlass']]], + ['shapestrides',['ShapeStrides',['../structcutlass_1_1ShapeStrides.html',1,'cutlass']]], + ['shapesub',['ShapeSub',['../structcutlass_1_1ShapeSub.html',1,'cutlass']]], + ['sharedloadstream',['SharedLoadStream',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html',1,'cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >']]], + ['sharedstorage',['SharedStorage',['../structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html',1,'cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html',1,'cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage']]], + ['simplifiedgemmepiloguetraits',['SimplifiedGemmEpilogueTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits',['SimplifiedGemmTraits',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraits_3c_20klayouta_5f_2c_20klayoutb_5f_2c_20gemmconfig_5f_2c_20gemmepilogue_3c_20gemmepiloguetraits_5f_20_3e_2c_20index_5f_20_3e',['SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ >',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html',1,'cutlass::gemm']]], + ['simplifiedgemmtraitshelper',['SimplifiedGemmTraitsHelper',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html',1,'cutlass::gemm']]], + ['sqrt_5fest',['sqrt_est',['../structcutlass_1_1sqrt__est.html',1,'cutlass']]], + ['storagetype',['StorageType',['../structcutlass_1_1StorageType.html',1,'cutlass']]], + ['storagetype_3c_201_20_3e',['StorageType< 1 >',['../structcutlass_1_1StorageType_3_011_01_4.html',1,'cutlass']]], + ['storagetype_3c_202_20_3e',['StorageType< 2 >',['../structcutlass_1_1StorageType_3_012_01_4.html',1,'cutlass']]], + ['storagetype_3c_204_20_3e',['StorageType< 4 >',['../structcutlass_1_1StorageType_3_014_01_4.html',1,'cutlass']]], + ['store',['Store',['../structcutlass_1_1Store.html',1,'cutlass']]], + ['store_3c_20double_2c_202_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< double, 2, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_2016_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 16 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_204_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 4 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html',1,'cutlass']]], + ['store_3c_20scalar_5f_2c_20lanes_5f_2c_20memory_5f_2c_20true_2c_208_20_3e',['Store< Scalar_, Lanes_, Memory_, true, 8 >',['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html',1,'cutlass']]], + ['streamsharedstorage',['StreamSharedStorage',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >'],['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage']]], + ['streamsharedstorage_3c_20globalloadstreama_2c_20sharedloadstreama_20_3e',['StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamA >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]], + ['streamsharedstorage_3c_20globalloadstreamb_2c_20sharedloadstreamb_20_3e',['StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamB >',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html',1,'cutlass::gemm::GemmTraits']]] +]; diff --git a/docs/generated-html/search/classes_f.html b/docs/generated-html/search/classes_f.html new file mode 100644 index 0000000000..fa6ed25ee0 --- /dev/null +++ b/docs/generated-html/search/classes_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/classes_f.js b/docs/generated-html/search/classes_f.js new file mode 100644 index 0000000000..72998f5343 --- /dev/null +++ b/docs/generated-html/search/classes_f.js @@ -0,0 +1,24 @@ +var searchData= +[ + ['tensorref',['TensorRef',['../classcutlass_1_1TensorRef.html',1,'cutlass']]], + ['tensorref_3c_20t_2c_204_20_3e',['TensorRef< T, 4 >',['../classcutlass_1_1TensorRef.html',1,'cutlass']]], + ['tensorview',['TensorView',['../classcutlass_1_1TensorView.html',1,'cutlass']]], + ['threadmultiplyadd',['ThreadMultiplyAdd',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html',1,'cutlass::gemm']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20half_2c_20half_2c_20half_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html',1,'cutlass::gemm']]], + ['threadmultiplyadd_3c_20accumulatorsperthread_5f_2c_20threadsperwarp_5f_2c_20int8_5ft_2c_20int8_5ft_2c_20int_20_3e',['ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html',1,'cutlass::gemm']]], + ['threadoffset',['ThreadOffset',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html',1,'cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset']]], + ['tiledthreadoffset',['TiledThreadOffset',['../structcutlass_1_1TiledThreadOffset.html',1,'cutlass']]], + ['tileiteratorbase',['TileIteratorBase',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20advance_5f_2c_20memoryspace_2c_20index_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratorfragment_3a_3akscalar_2c_20shape_3c_200_2c_200_2c_200_2c_200_20_3e_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileiteratorbase_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20iteratoradvance_3a_3akh_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileIteratorBase.html',1,'cutlass']]], + ['tileloaditerator',['TileLoadIterator',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass']]], + ['tileloaditerator_3c_20tiletraits_5f_2c_20tiletraits_5f_3a_3ascalar_2c_20tiletraits_5f_3a_3amultiplicandtraits_3a_3akkstrided_20_3f_20iteratoradvance_3a_3akh_20_3aiteratoradvance_3a_3akw_2c_20memoryspace_3a_3akglobal_2c_20index_5f_20_3e',['TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >',['../structcutlass_1_1TileLoadIterator.html',1,'cutlass']]], + ['tilestoreiterator',['TileStoreIterator',['../structcutlass_1_1TileStoreIterator.html',1,'cutlass']]], + ['tiletraits',['TileTraits',['../structcutlass_1_1TileTraits.html',1,'cutlass']]], + ['tiletraitscontiguousmajor',['TileTraitsContiguousMajor',['../structcutlass_1_1TileTraitsContiguousMajor.html',1,'cutlass']]], + ['tiletraitsstandard',['TileTraitsStandard',['../structcutlass_1_1TileTraitsStandard.html',1,'cutlass']]], + ['tiletraitsstridemajor',['TileTraitsStrideMajor',['../structcutlass_1_1TileTraitsStrideMajor.html',1,'cutlass']]], + ['tiletraitswarprake',['TileTraitsWarpRake',['../structcutlass_1_1TileTraitsWarpRake.html',1,'cutlass']]], + ['trivialiterator',['TrivialIterator',['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html',1,'cutlass::PredicateVector']]], + ['trivialpredicatetileadapter',['TrivialPredicateTileAdapter',['../structcutlass_1_1TrivialPredicateTileAdapter.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/close.png b/docs/generated-html/search/close.png new file mode 100644 index 0000000000000000000000000000000000000000..9342d3dfeea7b7c4ee610987e717804b5a42ceb9 GIT binary patch literal 273 zcmV+s0q*{ZP)4(RlMby96)VwnbG{ zbe&}^BDn7x>$<{ck4zAK-=nT;=hHG)kmplIF${xqm8db3oX6wT3bvp`TE@m0cg;b) zBuSL}5?N7O(iZLdAlz@)b)Rd~DnSsSX&P5qC`XwuFwcAYLC+d2>+1(8on;wpt8QIC X2MT$R4iQDd00000NkvXXu0mjfia~GN literal 0 HcmV?d00001 diff --git a/docs/generated-html/search/defines_0.html b/docs/generated-html/search/defines_0.html new file mode 100644 index 0000000000..3bffafa9b8 --- /dev/null +++ b/docs/generated-html/search/defines_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_0.js b/docs/generated-html/search/defines_0.js new file mode 100644 index 0000000000..84111a0222 --- /dev/null +++ b/docs/generated-html/search/defines_0.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['_5f_5fnv_5fstd_5fmax',['__NV_STD_MAX',['../platform_8h.html#abd31f291635329bc15292954f1f01d38',1,'platform.h']]], + ['_5f_5fnv_5fstd_5fmin',['__NV_STD_MIN',['../platform_8h.html#a39e234a3e3b0018b58df720bcb143420',1,'platform.h']]], + ['_5f_5fplatform_5fcat',['__platform_cat',['../platform_8h.html#aece7fe71be5aaf8d12dc9e2372f97de4',1,'platform.h']]], + ['_5f_5fplatform_5fcat_5f',['__platform_cat_',['../platform_8h.html#acd148999a5caeba8f6fd52e7e288e659',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/defines_1.html b/docs/generated-html/search/defines_1.html new file mode 100644 index 0000000000..ca5bb94e30 --- /dev/null +++ b/docs/generated-html/search/defines_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_1.js b/docs/generated-html/search/defines_1.js new file mode 100644 index 0000000000..f59274bcf9 --- /dev/null +++ b/docs/generated-html/search/defines_1.js @@ -0,0 +1,17 @@ +var searchData= +[ + ['constexpr',['constexpr',['../platform_8h.html#a72f0657181cca64b44eb186b707eb380',1,'platform.h']]], + ['cuda_5flog',['CUDA_LOG',['../debug_8h.html#a27e3466bcf1ec7fda4f6f95aa0a51177',1,'debug.h']]], + ['cuda_5flog_5fdebug',['CUDA_LOG_DEBUG',['../debug_8h.html#a8d6986db819719ada8b29d53dfc104a6',1,'debug.h']]], + ['cuda_5fperror',['CUDA_PERROR',['../debug_8h.html#aed8337b88d71895f95f8980ef0b3a50b',1,'debug.h']]], + ['cuda_5fperror_5fdebug',['CUDA_PERROR_DEBUG',['../debug_8h.html#a36436f5408940a47ac5cdfc9b31648db',1,'debug.h']]], + ['cuda_5fperror_5fexit',['CUDA_PERROR_EXIT',['../debug_8h.html#a002632ff687c83cff0484476be401f05',1,'debug.h']]], + ['cutlass_5fassert',['CUTLASS_ASSERT',['../cutlass_8h.html#a0159b8e4cd578881a1ccfd0921516af7',1,'cutlass.h']]], + ['cutlass_5fhost_5fdevice',['CUTLASS_HOST_DEVICE',['../cutlass_8h.html#a28c2443a142676d3d71effdae1a986b1',1,'cutlass.h']]], + ['cutlass_5fmajor',['CUTLASS_MAJOR',['../cutlass_8h.html#a8ff3cda9323810c1c504793a0206d4b8',1,'cutlass.h']]], + ['cutlass_5fminor',['CUTLASS_MINOR',['../cutlass_8h.html#ad114a1ab01f73833ea00020ffb7bcea7',1,'cutlass.h']]], + ['cutlass_5fpatch',['CUTLASS_PATCH',['../cutlass_8h.html#a1d4e5818a594bbfc472e54978955cb8b',1,'cutlass.h']]], + ['cutlass_5fpragma_5fno_5funroll',['CUTLASS_PRAGMA_NO_UNROLL',['../cutlass_8h.html#adb3bc73d74b4a4bf13099d5696db3352',1,'cutlass.h']]], + ['cutlass_5fpragma_5funroll',['CUTLASS_PRAGMA_UNROLL',['../cutlass_8h.html#a4b1c9f25ab6eaa25e1f2258dd63e6ce4',1,'cutlass.h']]], + ['cutlass_5fversion',['CUTLASS_VERSION',['../cutlass_8h.html#aa3040eddf073214969f9445bfa925039',1,'cutlass.h']]] +]; diff --git a/docs/generated-html/search/defines_2.html b/docs/generated-html/search/defines_2.html new file mode 100644 index 0000000000..7cc1a74c0a --- /dev/null +++ b/docs/generated-html/search/defines_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_2.js b/docs/generated-html/search/defines_2.js new file mode 100644 index 0000000000..6b3f8ea5db --- /dev/null +++ b/docs/generated-html/search/defines_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['noexcept',['noexcept',['../platform_8h.html#a189faadd7f99f6c354db09acbb2aafcd',1,'platform.h']]], + ['nullptr',['nullptr',['../platform_8h.html#ab979d9d4b4923f7c54d6caa6e1a61936',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/defines_3.html b/docs/generated-html/search/defines_3.html new file mode 100644 index 0000000000..3d0ac12317 --- /dev/null +++ b/docs/generated-html/search/defines_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/defines_3.js b/docs/generated-html/search/defines_3.js new file mode 100644 index 0000000000..72e85b8534 --- /dev/null +++ b/docs/generated-html/search/defines_3.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['static_5fassert',['static_assert',['../platform_8h.html#adde4c9ea91b753491851361a4198c009',1,'platform.h']]] +]; diff --git a/docs/generated-html/search/enums_0.html b/docs/generated-html/search/enums_0.html new file mode 100644 index 0000000000..9efcd1b75e --- /dev/null +++ b/docs/generated-html/search/enums_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enums_0.js b/docs/generated-html/search/enums_0.js new file mode 100644 index 0000000000..73c94c1fec --- /dev/null +++ b/docs/generated-html/search/enums_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['kind',['Kind',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375',1,'cutlass::Identity::Kind()'],['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03c',1,'cutlass::MemorySpace::Kind()'],['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2b',1,'cutlass::MatrixLayout::Kind()'],['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0c',1,'cutlass::GemmOperand::Kind()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738dda',1,'cutlass::IteratorAdvance::Kind()'],['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80',1,'cutlass::IteratorFragment::Kind()']]] +]; diff --git a/docs/generated-html/search/enumvalues_0.html b/docs/generated-html/search/enumvalues_0.html new file mode 100644 index 0000000000..03fdfad99d --- /dev/null +++ b/docs/generated-html/search/enumvalues_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_0.js b/docs/generated-html/search/enumvalues_0.js new file mode 100644 index 0000000000..f5435725f6 --- /dev/null +++ b/docs/generated-html/search/enumvalues_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['additive',['Additive',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375a77d7cc80ec0c3ff42ca9b2aff98a1646',1,'cutlass::Identity']]] +]; diff --git a/docs/generated-html/search/enumvalues_1.html b/docs/generated-html/search/enumvalues_1.html new file mode 100644 index 0000000000..abeea56418 --- /dev/null +++ b/docs/generated-html/search/enumvalues_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_1.js b/docs/generated-html/search/enumvalues_1.js new file mode 100644 index 0000000000..b06592d018 --- /dev/null +++ b/docs/generated-html/search/enumvalues_1.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['ka',['kA',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0cac2b9fe9e3679a059d1a6c946b2a2c31a',1,'cutlass::GemmOperand']]], + ['kb',['kB',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0caad0876342d150cef7da6ae149d5e99f9',1,'cutlass::GemmOperand']]], + ['kc',['kC',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca7598e104da2001a76ec344f1c1b9c6dc',1,'cutlass::GemmOperand']]], + ['kcolumnmajor',['kColumnMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2bac15988acba79c11072d38b295f163a2b',1,'cutlass::MatrixLayout']]], + ['kd',['kD',['../structcutlass_1_1GemmOperand.html#ab209ea3de198efabe8e8707dfe8e0a0ca49eef82461e44c96462f9c4dbaab71fe',1,'cutlass::GemmOperand::kD()'],['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa56ecb02f4ed3bd7ae4a9c971805ee8c5',1,'cutlass::IteratorAdvance::kD()']]], + ['kgeneric',['kGeneric',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca21a44c0b78017acea0d1ffe223e5ca38',1,'cutlass::MemorySpace']]], + ['kglobal',['kGlobal',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03cac4bd4070cc396d698beb7ca2e3bbff37',1,'cutlass::MemorySpace']]], + ['kh',['kH',['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaacfe756fca665eb1bbf389850915c1b81',1,'cutlass::IteratorAdvance']]], + ['klanes',['kLanes',['../unioncutlass_1_1Vector.html#a824f9ab976c8e7f035236af03e5ae839a605c5e987bc7b08d743f29a6524abb27',1,'cutlass::Vector::kLanes()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#aa70d2fd36f00b63d321c1f7b6d6c3024ad242b575673ca1bf9cf311e58a966392',1,'cutlass::Vector< half, kLanes_ >::kLanes()']]], + ['krequiresloadfence',['kRequiresLoadFence',['../structcutlass_1_1TileLoadIterator.html#a1f3601c595f12e7083919ece9b1ec84eaee9d9d6cea8079c32c9383bde45161fc',1,'cutlass::TileLoadIterator']]], + ['krowmajor',['kRowMajor',['../structcutlass_1_1MatrixLayout.html#a97ef07af21b122c1804245b0c7784d2ba6a287c17f9f5bf53528ae68296beeedb',1,'cutlass::MatrixLayout']]], + ['kscalar',['kScalar',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80aeca44a186befa21ccae44eb4dc7b6954',1,'cutlass::IteratorFragment']]], + ['kshared',['kShared',['../structcutlass_1_1MemorySpace.html#a1e031ec41668015a8fe4ba2c1145d03ca2804339b2be64ff68ae3042073aaa7cc',1,'cutlass::MemorySpace']]], + ['kvectorsize',['kVectorSize',['../unioncutlass_1_1Vector.html#abf0c16b6f9cb8439835ebdb271d58763afaf4b62c6bcafbf961c5570364a0316e',1,'cutlass::Vector::kVectorSize()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#adc4140a7e40be1e4f81c78a657c7ba73abfbb3cf98db2f8af7150efb91cac4e79',1,'cutlass::Vector< half, kLanes_ >::kVectorSize()']]], + ['kw',['kW',['../structcutlass_1_1IteratorAdvance.html#a9ad9c2302ddffa148d47cdcf6c738ddaa567e61af8a3401d302f3a3ab26418df0',1,'cutlass::IteratorAdvance']]], + ['kwmmamatrix',['kWmmaMatrix',['../structcutlass_1_1IteratorFragment.html#ae7b6a9ac856eca8b8e437305fa716a80a21d2b2793bab0d348df40715b8f14419',1,'cutlass::IteratorFragment']]] +]; diff --git a/docs/generated-html/search/enumvalues_2.html b/docs/generated-html/search/enumvalues_2.html new file mode 100644 index 0000000000..90289986a1 --- /dev/null +++ b/docs/generated-html/search/enumvalues_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_2.js b/docs/generated-html/search/enumvalues_2.js new file mode 100644 index 0000000000..d96aad12a1 --- /dev/null +++ b/docs/generated-html/search/enumvalues_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['multiplicative',['Multiplicative',['../structcutlass_1_1Identity.html#a37966282c824c6d0e32b432275ea8375af0cc1d8a713958a86af1063595604597',1,'cutlass::Identity']]] +]; diff --git a/docs/generated-html/search/enumvalues_3.html b/docs/generated-html/search/enumvalues_3.html new file mode 100644 index 0000000000..b152efcb57 --- /dev/null +++ b/docs/generated-html/search/enumvalues_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/enumvalues_3.js b/docs/generated-html/search/enumvalues_3.js new file mode 100644 index 0000000000..254df8ef6e --- /dev/null +++ b/docs/generated-html/search/enumvalues_3.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['value',['value',['../structcutlass_1_1log2__down.html#a793565cd891559fab765455e847171dca23d1b50f2f02e1026d4b5dc7ebd6880d',1,'cutlass::log2_down::value()'],['../structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html#ad7d3c2329ab708bd4af36ffaee8509cba282c4c5d8f66dc49544f34071f148b1f',1,'cutlass::log2_down< N, 1, Count >::value()'],['../structcutlass_1_1log2__up.html#a5826002505544547d0c5cc311c2338e3a09591054a7c9b184769d579c56dd09d6',1,'cutlass::log2_up::value()'],['../structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html#ab001737f02df0a2c514334a1bfa6f1f9a6b6af5b6bf14ee5d3e3f1442e7f75117',1,'cutlass::log2_up< N, 1, Count >::value()'],['../structcutlass_1_1sqrt__est.html#abe44577e3d8f34fc07bb9ecf89b25b11a2e73d046302be2504f50c08d788e9964',1,'cutlass::sqrt_est::value()'],['../structcutlass_1_1divide__assert.html#a20e8b8a803c6b5cfe636724760442e33ab924a64662c2eb917b1dd4ca31fdd2dc',1,'cutlass::divide_assert::value()'],['../structcutlass_1_1platform_1_1alignment__of.html#aa1d40937d3536b68e90c580765821389aa36284864bc3d1f73d3bf73cd8da7c83',1,'cutlass::platform::alignment_of::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html#a6005c446eb41749276e0114b82abd990a5b0129d0f9bb45f1c56506efbbb22b6f',1,'cutlass::platform::alignment_of< int4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html#ac55e0c5a0bc4c95981744e55ee7580cea807729922944eede573430b20ad4b322',1,'cutlass::platform::alignment_of< uint4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html#ac9e709c32271b14b35c9607c64835a95a6a6ee3f24f4d123fc7c138fe5b776f2e',1,'cutlass::platform::alignment_of< float4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html#ad58512f76f0b9b000d48f1ff869a0547a3d020dd8ba5c735a60d7c2c897e158f5',1,'cutlass::platform::alignment_of< long4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html#adc0eec628649de183fe984bb46898830a8152a79c27d055dc3d0b8d662c0bc96a',1,'cutlass::platform::alignment_of< ulong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html#aadf6522691db02f1aab22c22716f0793a940fa73dc4f0a49b78e4e0cefaf4775d',1,'cutlass::platform::alignment_of< longlong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html#a511f088278b3de04feb55ab60bdc5a09a58b5cc7be52956c43c2966af5887db80',1,'cutlass::platform::alignment_of< ulonglong2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html#a5fb114d264023728cca5364401bd6929a7b89d57c8009e094f69ff57e196d8318',1,'cutlass::platform::alignment_of< double2 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html#a666c4fd30155873e3499f5cdc11782daafc1a7c2bb5e6483d42d380a2b4fd9561',1,'cutlass::platform::alignment_of< longlong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html#a2568c1ab218cab6505bd20e3c2c420ffa54f6e1afec0ed30b18ab79fd6faf81b5',1,'cutlass::platform::alignment_of< ulonglong4 >::value()'],['../structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html#a024eaf40a8f3e8bd38b416868e0c68bca5a60b16666306472e92ad1320473ba85',1,'cutlass::platform::alignment_of< double4 >::value()']]] +]; diff --git a/docs/generated-html/search/files_0.html b/docs/generated-html/search/files_0.html new file mode 100644 index 0000000000..49606c82c6 --- /dev/null +++ b/docs/generated-html/search/files_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_0.js b/docs/generated-html/search/files_0.js new file mode 100644 index 0000000000..0ba05c30ad --- /dev/null +++ b/docs/generated-html/search/files_0.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['clear_5faccumulators_2eh',['clear_accumulators.h',['../clear__accumulators_8h.html',1,'']]], + ['convert_2eh',['convert.h',['../convert_8h.html',1,'']]], + ['coord_2eh',['coord.h',['../coord_8h.html',1,'']]], + ['core_5fio_2eh',['core_io.h',['../core__io_8h.html',1,'']]], + ['cutlass_2eh',['cutlass.h',['../cutlass_8h.html',1,'']]], + ['cutlass_5fmath_2eh',['cutlass_math.h',['../cutlass__math_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_1.html b/docs/generated-html/search/files_1.html new file mode 100644 index 0000000000..c8871748e2 --- /dev/null +++ b/docs/generated-html/search/files_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_1.js b/docs/generated-html/search/files_1.js new file mode 100644 index 0000000000..e0b1e0e043 --- /dev/null +++ b/docs/generated-html/search/files_1.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['debug_2eh',['debug.h',['../debug_8h.html',1,'']]], + ['dgemm_5ftraits_2eh',['dgemm_traits.h',['../dgemm__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_2.html b/docs/generated-html/search/files_2.html new file mode 100644 index 0000000000..99bdf21c8c --- /dev/null +++ b/docs/generated-html/search/files_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_2.js b/docs/generated-html/search/files_2.js new file mode 100644 index 0000000000..a34177fed4 --- /dev/null +++ b/docs/generated-html/search/files_2.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['fragment_2eh',['fragment.h',['../fragment_8h.html',1,'']]], + ['fragment_5fload_5fstore_2eh',['fragment_load_store.h',['../fragment__load__store_8h.html',1,'']]], + ['fragment_5fmultiply_5fadd_2eh',['fragment_multiply_add.h',['../fragment__multiply__add_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_3.html b/docs/generated-html/search/files_3.html new file mode 100644 index 0000000000..f8e543a84b --- /dev/null +++ b/docs/generated-html/search/files_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_3.js b/docs/generated-html/search/files_3.js new file mode 100644 index 0000000000..0c2ade3e70 --- /dev/null +++ b/docs/generated-html/search/files_3.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['gemm_2eh',['gemm.h',['../gemm_8h.html',1,'']]], + ['gemm_5fepilogue_2eh',['gemm_epilogue.h',['../gemm__epilogue_8h.html',1,'']]], + ['gemm_5fepilogue_5ftraits_2eh',['gemm_epilogue_traits.h',['../gemm__epilogue__traits_8h.html',1,'']]], + ['gemm_5fglobal_5fstream_2eh',['gemm_global_stream.h',['../gemm__global__stream_8h.html',1,'']]], + ['gemm_5fglobal_5ftile_2eh',['gemm_global_tile.h',['../gemm__global__tile_8h.html',1,'']]], + ['gemm_5foperand_2eh',['gemm_operand.h',['../gemm__operand_8h.html',1,'']]], + ['gemm_5fshared_5fstream_2eh',['gemm_shared_stream.h',['../gemm__shared__stream_8h.html',1,'']]], + ['gemm_5fshared_5ftile_2eh',['gemm_shared_tile.h',['../gemm__shared__tile_8h.html',1,'']]], + ['gemm_5ftraits_2eh',['gemm_traits.h',['../gemm__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_4.html b/docs/generated-html/search/files_4.html new file mode 100644 index 0000000000..2ebb46c7e7 --- /dev/null +++ b/docs/generated-html/search/files_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_4.js b/docs/generated-html/search/files_4.js new file mode 100644 index 0000000000..991070fd5b --- /dev/null +++ b/docs/generated-html/search/files_4.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['hgemm_5fglobal_5ftile_2eh',['hgemm_global_tile.h',['../hgemm__global__tile_8h.html',1,'']]], + ['hgemm_5fmultiply_5fadd_2eh',['hgemm_multiply_add.h',['../hgemm__multiply__add_8h.html',1,'']]], + ['hgemm_5fswizzle_2eh',['hgemm_swizzle.h',['../hgemm__swizzle_8h.html',1,'']]], + ['hgemm_5ftraits_2eh',['hgemm_traits.h',['../hgemm__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_5.html b/docs/generated-html/search/files_5.html new file mode 100644 index 0000000000..268b7eb539 --- /dev/null +++ b/docs/generated-html/search/files_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_5.js b/docs/generated-html/search/files_5.js new file mode 100644 index 0000000000..6e82d0ebed --- /dev/null +++ b/docs/generated-html/search/files_5.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['identity_5fblock_5fswizzle_2eh',['identity_block_swizzle.h',['../identity__block__swizzle_8h.html',1,'']]], + ['igemm_5fepilogue_2eh',['igemm_epilogue.h',['../igemm__epilogue_8h.html',1,'']]], + ['igemm_5fglobal_5ftile_2eh',['igemm_global_tile.h',['../igemm__global__tile_8h.html',1,'']]], + ['igemm_5fmultiply_5fadd_2eh',['igemm_multiply_add.h',['../igemm__multiply__add_8h.html',1,'']]], + ['igemm_5fswizzle_2eh',['igemm_swizzle.h',['../igemm__swizzle_8h.html',1,'']]], + ['igemm_5ftraits_2eh',['igemm_traits.h',['../igemm__traits_8h.html',1,'']]], + ['iterator_5faccess_2eh',['iterator_access.h',['../iterator__access_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_6.html b/docs/generated-html/search/files_6.html new file mode 100644 index 0000000000..98fc6666c2 --- /dev/null +++ b/docs/generated-html/search/files_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_6.js b/docs/generated-html/search/files_6.js new file mode 100644 index 0000000000..20c8aded98 --- /dev/null +++ b/docs/generated-html/search/files_6.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['linear_5fscaling_2eh',['linear_scaling.h',['../linear__scaling_8h.html',1,'']]], + ['load_5fstore_2eh',['load_store.h',['../load__store_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_7.html b/docs/generated-html/search/files_7.html new file mode 100644 index 0000000000..49507dedbd --- /dev/null +++ b/docs/generated-html/search/files_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_7.js b/docs/generated-html/search/files_7.js new file mode 100644 index 0000000000..c9a077bce4 --- /dev/null +++ b/docs/generated-html/search/files_7.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['matrix_5ftraits_2eh',['matrix_traits.h',['../matrix__traits_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_8.html b/docs/generated-html/search/files_8.html new file mode 100644 index 0000000000..12c6630be3 --- /dev/null +++ b/docs/generated-html/search/files_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_8.js b/docs/generated-html/search/files_8.js new file mode 100644 index 0000000000..b0cdc6b74a --- /dev/null +++ b/docs/generated-html/search/files_8.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['platform_2eh',['platform.h',['../platform_8h.html',1,'']]], + ['predicate_5fvector_2eh',['predicate_vector.h',['../predicate__vector_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_9.html b/docs/generated-html/search/files_9.html new file mode 100644 index 0000000000..cabcae2f88 --- /dev/null +++ b/docs/generated-html/search/files_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_9.js b/docs/generated-html/search/files_9.js new file mode 100644 index 0000000000..e3c39a100a --- /dev/null +++ b/docs/generated-html/search/files_9.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['reshape_5ftile_2eh',['reshape_tile.h',['../reshape__tile_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_a.html b/docs/generated-html/search/files_a.html new file mode 100644 index 0000000000..f7402215c5 --- /dev/null +++ b/docs/generated-html/search/files_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_a.js b/docs/generated-html/search/files_a.js new file mode 100644 index 0000000000..5ee4742487 --- /dev/null +++ b/docs/generated-html/search/files_a.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['sgemm_5ftraits_2eh',['sgemm_traits.h',['../sgemm__traits_8h.html',1,'']]], + ['shape_2eh',['shape.h',['../shape_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_b.html b/docs/generated-html/search/files_b.html new file mode 100644 index 0000000000..7be1003070 --- /dev/null +++ b/docs/generated-html/search/files_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_b.js b/docs/generated-html/search/files_b.js new file mode 100644 index 0000000000..f443994e64 --- /dev/null +++ b/docs/generated-html/search/files_b.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['tensor_5fref_2eh',['tensor_ref.h',['../tensor__ref_8h.html',1,'']]], + ['tensor_5fview_2eh',['tensor_view.h',['../tensor__view_8h.html',1,'']]], + ['thread_5fmultiply_5fadd_2eh',['thread_multiply_add.h',['../thread__multiply__add_8h.html',1,'']]], + ['tile_5fiterator_2eh',['tile_iterator.h',['../tile__iterator_8h.html',1,'']]], + ['tile_5ftraits_5fstandard_2eh',['tile_traits_standard.h',['../tile__traits__standard_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_c.html b/docs/generated-html/search/files_c.html new file mode 100644 index 0000000000..c769b49bd6 --- /dev/null +++ b/docs/generated-html/search/files_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_c.js b/docs/generated-html/search/files_c.js new file mode 100644 index 0000000000..4edbbe3921 --- /dev/null +++ b/docs/generated-html/search/files_c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['vector_2eh',['vector.h',['../vector_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_d.html b/docs/generated-html/search/files_d.html new file mode 100644 index 0000000000..54e39775bd --- /dev/null +++ b/docs/generated-html/search/files_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_d.js b/docs/generated-html/search/files_d.js new file mode 100644 index 0000000000..732797da7c --- /dev/null +++ b/docs/generated-html/search/files_d.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['wmma_5fgemm_5fepilogue_5ftraits_2eh',['wmma_gemm_epilogue_traits.h',['../wmma__gemm__epilogue__traits_8h.html',1,'']]], + ['wmma_5fgemm_5fglobal_5ftile_2eh',['wmma_gemm_global_tile.h',['../wmma__gemm__global__tile_8h.html',1,'']]], + ['wmma_5fgemm_5fmultiply_5fadd_2eh',['wmma_gemm_multiply_add.h',['../wmma__gemm__multiply__add_8h.html',1,'']]], + ['wmma_5fgemm_5fshared_5ftile_2eh',['wmma_gemm_shared_tile.h',['../wmma__gemm__shared__tile_8h.html',1,'']]], + ['wmma_5fgemm_5ftraits_2eh',['wmma_gemm_traits.h',['../wmma__gemm__traits_8h.html',1,'']]], + ['wmma_5fmatrix_2eh',['wmma_matrix.h',['../wmma__matrix_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/files_e.html b/docs/generated-html/search/files_e.html new file mode 100644 index 0000000000..febb549419 --- /dev/null +++ b/docs/generated-html/search/files_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/files_e.js b/docs/generated-html/search/files_e.js new file mode 100644 index 0000000000..732797da7c --- /dev/null +++ b/docs/generated-html/search/files_e.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['wmma_5fgemm_5fepilogue_5ftraits_2eh',['wmma_gemm_epilogue_traits.h',['../wmma__gemm__epilogue__traits_8h.html',1,'']]], + ['wmma_5fgemm_5fglobal_5ftile_2eh',['wmma_gemm_global_tile.h',['../wmma__gemm__global__tile_8h.html',1,'']]], + ['wmma_5fgemm_5fmultiply_5fadd_2eh',['wmma_gemm_multiply_add.h',['../wmma__gemm__multiply__add_8h.html',1,'']]], + ['wmma_5fgemm_5fshared_5ftile_2eh',['wmma_gemm_shared_tile.h',['../wmma__gemm__shared__tile_8h.html',1,'']]], + ['wmma_5fgemm_5ftraits_2eh',['wmma_gemm_traits.h',['../wmma__gemm__traits_8h.html',1,'']]], + ['wmma_5fmatrix_2eh',['wmma_matrix.h',['../wmma__matrix_8h.html',1,'']]] +]; diff --git a/docs/generated-html/search/functions_0.html b/docs/generated-html/search/functions_0.html new file mode 100644 index 0000000000..0539c8cede --- /dev/null +++ b/docs/generated-html/search/functions_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_0.js b/docs/generated-html/search/functions_0.js new file mode 100644 index 0000000000..9bb68b7cf6 --- /dev/null +++ b/docs/generated-html/search/functions_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_5f_5falign_5f_5f',['__align__',['../namespacecutlass_1_1platform.html#ac9068e2d027ffdf5cd564deecc2cb9e8',1,'cutlass::platform::__align__(1) aligned_chunk< 1 >'],['../namespacecutlass_1_1platform.html#a0bcb016704ec57f9499e662ba6156f98',1,'cutlass::platform::__align__(2) aligned_chunk< 2 >'],['../namespacecutlass_1_1platform.html#a71be5af25eeffa4077777f919e67d8da',1,'cutlass::platform::__align__(4) aligned_chunk< 4 >'],['../namespacecutlass_1_1platform.html#a42440254a16d4b6b95b95cc3360ee372',1,'cutlass::platform::__align__(8) aligned_chunk< 8 >'],['../namespacecutlass_1_1platform.html#a91d5e970d6ebe619914f40a9510bdb1e',1,'cutlass::platform::__align__(16) aligned_chunk< 16 >'],['../namespacecutlass_1_1platform.html#a210f4d360b1f9c3d074e71129fe4c0d9',1,'cutlass::platform::__align__(32) aligned_chunk< 32 >'],['../namespacecutlass_1_1platform.html#ae792b1c7ada1a33e306cd552f583bdce',1,'cutlass::platform::__align__(64) aligned_chunk< 64 >'],['../namespacecutlass_1_1platform.html#a5712ec4fed335a9b7f863fb3abe3c5eb',1,'cutlass::platform::__align__(128) aligned_chunk< 128 >'],['../namespacecutlass_1_1platform.html#a595cc98db29fb4d59772d2e2f52e347a',1,'cutlass::platform::__align__(256) aligned_chunk< 256 >'],['../namespacecutlass_1_1platform.html#ae70bb5d14a66500b47d2e3f83063d4a5',1,'cutlass::platform::__align__(512) aligned_chunk< 512 >'],['../namespacecutlass_1_1platform.html#a181e44e9c66f704175590727aaa9e5a1',1,'cutlass::platform::__align__(1024) aligned_chunk< 1024 >'],['../namespacecutlass_1_1platform.html#ae72c8fa997bb251d4140dceb03147154',1,'cutlass::platform::__align__(2048) aligned_chunk< 2048 >'],['../namespacecutlass_1_1platform.html#ada29683f1b408ae7b73cc8fbe2108628',1,'cutlass::platform::__align__(4096) aligned_chunk< 4096 >'],['../namespacecutlass.html#ae6ee3d9361526f859d737d9c68c13706',1,'cutlass::__align__(1) AlignedStruct< 1 >'],['../namespacecutlass.html#a602227fad962270da185209ecc6012f2',1,'cutlass::__align__(2) AlignedStruct< 2 >'],['../namespacecutlass.html#a266d7d2ae6e79537e46ee37b4fdface7',1,'cutlass::__align__(4) AlignedStruct< 4 >'],['../namespacecutlass.html#a1101e01215ddb0e5a7b120a4541a3c4e',1,'cutlass::__align__(8) AlignedStruct< 8 >'],['../namespacecutlass.html#aa4071cf5103f352a5100d9b4bba895e2',1,'cutlass::__align__(16) AlignedStruct< 16 >'],['../namespacecutlass.html#ada65694bdd4b70d4c9d769a536275a47',1,'cutlass::__align__(32) AlignedStruct< 32 >'],['../namespacecutlass.html#aa80a7cb3febd19b96f2ecbcb610b1b9e',1,'cutlass::__align__(64) AlignedStruct< 64 >']]] +]; diff --git a/docs/generated-html/search/functions_1.html b/docs/generated-html/search/functions_1.html new file mode 100644 index 0000000000..4878b3d12d --- /dev/null +++ b/docs/generated-html/search/functions_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_1.js b/docs/generated-html/search/functions_1.js new file mode 100644 index 0000000000..8b50e666c4 --- /dev/null +++ b/docs/generated-html/search/functions_1.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['advance',['advance',['../classcutlass_1_1TensorRef.html#aab0dafb81a462320e55e0dc4a5886478',1,'cutlass::TensorRef']]], + ['at',['at',['../structcutlass_1_1Coord.html#ad10b59430927a354fcd874d2d32f1bd8',1,'cutlass::Coord::at()'],['../structcutlass_1_1Coord.html#ab511a16210d1b94449f5bc6476f6a266',1,'cutlass::Coord::at(int dim)'],['../structcutlass_1_1Coord.html#af9cc7ab2088544d1240ac51c4c6e685d',1,'cutlass::Coord::at() const'],['../structcutlass_1_1Coord.html#aed4f4d1c7c0749fe72736d7a1213b6e9',1,'cutlass::Coord::at(int dim) const'],['../structcutlass_1_1FragmentIterator.html#a9cf31df06ff035705a1341810fcdcbf2',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0) const'],['../structcutlass_1_1FragmentIterator.html#a7bdc407aae8d7360e089af347b585a53',1,'cutlass::FragmentIterator::at(int d, int h, int w, int c=0)'],['../structcutlass_1_1FragmentConstIterator.html#a8b957150545becacab1b8ead1be29424',1,'cutlass::FragmentConstIterator::at()'],['../structcutlass_1_1PredicateVector.html#ac8eca7087d1f7575b0c6beeb5f907bfd',1,'cutlass::PredicateVector::at()'],['../structcutlass_1_1TrivialPredicateTileAdapter.html#a3e41ab145489df08fca79251b2253d0f',1,'cutlass::TrivialPredicateTileAdapter::at()'],['../structcutlass_1_1PredicateTileAdapter.html#a7d54e877bca2e840c142293b4826e986',1,'cutlass::PredicateTileAdapter::at()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a9e5651009a7b8df9960527c18c7b05dd',1,'cutlass::ConstPredicateTileAdapter::at()'],['../classcutlass_1_1TensorRef.html#a7eff42a37e4dbee488bfa726f3f0df4f',1,'cutlass::TensorRef::at(Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a5702dea703104ab431c098c7b039c215',1,'cutlass::TensorRef::at(int idx) const'],['../classcutlass_1_1TensorView.html#ad894a8b373c413d308cb1b7c7ba545ce',1,'cutlass::TensorView::at(Coord_t const &coord) const'],['../classcutlass_1_1TensorView.html#acc55581896fae8c0449b44b56d750155',1,'cutlass::TensorView::at(Offset_t idx) const']]] +]; diff --git a/docs/generated-html/search/functions_10.html b/docs/generated-html/search/functions_10.html new file mode 100644 index 0000000000..6f6fbae23f --- /dev/null +++ b/docs/generated-html/search/functions_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_10.js b/docs/generated-html/search/functions_10.js new file mode 100644 index 0000000000..e7ab3ee107 --- /dev/null +++ b/docs/generated-html/search/functions_10.js @@ -0,0 +1,11 @@ +var searchData= +[ + ['tensorref',['TensorRef',['../classcutlass_1_1TensorRef.html#a54f6edc293b0b8ac97f02e8ab951c478',1,'cutlass::TensorRef::TensorRef()'],['../classcutlass_1_1TensorRef.html#ae48325312183ff61dbd312c64f31fcb8',1,'cutlass::TensorRef::TensorRef(Storage *ptr, Coord< Rank > stride)']]], + ['tensorview',['TensorView',['../classcutlass_1_1TensorView.html#a22401348796d603546e44d6c196018dc',1,'cutlass::TensorView::TensorView()'],['../classcutlass_1_1TensorView.html#a80480aa986a488a106a9b0aea331c317',1,'cutlass::TensorView::TensorView(TensorRef_t const &_ref, Coord_t const &_size)']]], + ['threadmultiplyadd',['ThreadMultiplyAdd',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#acec155117a56c942c5e695984b0f072d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a9b75e499f4c14369b5c86051dceeb81d',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ab271a3f11ccde4b629ddb11b78c0d555',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadMultiplyAdd()']]], + ['tileloaditerator',['TileLoadIterator',['../structcutlass_1_1TileLoadIterator.html#a81c9c0b17bf5f214230ecf10e0690a4e',1,'cutlass::TileLoadIterator::TileLoadIterator()'],['../structcutlass_1_1TileLoadIterator.html#a93e166575be3b2f7489833ae5da23f23',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileLoadIterator.html#a53282fa4cb33cfcec79033d26e418af6',1,'cutlass::TileLoadIterator::TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['tilestoreiterator',['TileStoreIterator',['../structcutlass_1_1TileStoreIterator.html#aac4d49854d63f632627b6974f9b59dbb',1,'cutlass::TileStoreIterator::TileStoreIterator()'],['../structcutlass_1_1TileStoreIterator.html#a037ccd942359e6bc8640a240b13cd330',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())'],['../structcutlass_1_1TileStoreIterator.html#a4f89c5182659de94605300e15c3651b2',1,'cutlass::TileStoreIterator::TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())']]], + ['transform',['transform',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a4dd95354137d3cb52752ecdd346a5685',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#aa9fe67c947bf461ba3e3ca48daa34815',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1Copy.html#ab356f0f473aa3fd8df8fb8ddd8e0e9f3',1,'cutlass::Copy::transform(Fragment_ const &src, Fragment_ &dst)'],['../structcutlass_1_1Copy.html#a171f9a44c05b6fb432b0339979de4eb2',1,'cutlass::Copy::transform(InputFragment_ const &src, int offset, Fragment_ &dst)'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ad467ce744bf9d478900fb2661d7a1c26',1,'cutlass::gemm::HgemmSwizzle::transform()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a91ad48362b99a5f96ac1e92e95104f7b',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a819fd33db88a68521108bab2641d73fd',1,'cutlass::gemm::IgemmFloatToInt8Converter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#aca8a61e8eb1ab33b9c61e2e7d342379d',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(InputFragment const &src, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a89e078dbf376da872c3993ccbaf744d3',1,'cutlass::gemm::IgemmInt8ToFloatConverter::transform(Fragment_ const &src, int offset, OutputFragment &dst)'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a084917a512c7a411b76a69f86b906811',1,'cutlass::gemm::IgemmSwizzle::transform()']]], + ['trivialiterator',['TrivialIterator',['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a6cb3664b5cba4280b7055a65ddad7850',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ada8cd3ac6db568bb9bf268ba2c3a3e14',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(Iterator const &it)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a3adf0440f9a0143a61b43d39c3f03721',1,'cutlass::PredicateVector::TrivialIterator::TrivialIterator(PredicateVector const &_vec)']]], + ['trivialpredicatetileadapter',['TrivialPredicateTileAdapter',['../structcutlass_1_1TrivialPredicateTileAdapter.html#a7259853a129a7e319b972d3b41dd59d7',1,'cutlass::TrivialPredicateTileAdapter']]] +]; diff --git a/docs/generated-html/search/functions_11.html b/docs/generated-html/search/functions_11.html new file mode 100644 index 0000000000..dd88d8b7e6 --- /dev/null +++ b/docs/generated-html/search/functions_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_11.js b/docs/generated-html/search/functions_11.js new file mode 100644 index 0000000000..a0eb54d4d3 --- /dev/null +++ b/docs/generated-html/search/functions_11.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['unique_5fptr',['unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html#aa8a370bc7e4c2d99eb85e7fea27b3179',1,'cutlass::platform::unique_ptr::unique_ptr()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a14c8bf5a5deefe4a6602ccd5c5af364c',1,'cutlass::platform::unique_ptr::unique_ptr(pointer p)']]] +]; diff --git a/docs/generated-html/search/functions_12.html b/docs/generated-html/search/functions_12.html new file mode 100644 index 0000000000..7093d19fe3 --- /dev/null +++ b/docs/generated-html/search/functions_12.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_12.js b/docs/generated-html/search/functions_12.js new file mode 100644 index 0000000000..972cb65272 --- /dev/null +++ b/docs/generated-html/search/functions_12.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['valid',['valid',['../structcutlass_1_1FragmentIterator.html#ab18f8ea676b45831f939715212167a99',1,'cutlass::FragmentIterator::valid()'],['../structcutlass_1_1FragmentConstIterator.html#a01571b2fc566793fd50a10fa82441951',1,'cutlass::FragmentConstIterator::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ac4d2c293f9312b673ea29bf79b2882fd',1,'cutlass::gemm::GemmGlobalIteratorAb::valid()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6594acc213fc8d4289c6c73631f60120',1,'cutlass::gemm::GemmGlobalIteratorCd::valid()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a468f8f503777e4a2b0089ee2bd6c471a',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::valid()'],['../structcutlass_1_1TileIteratorBase.html#af78a2bf3e7507dc7f50343a3c209f770',1,'cutlass::TileIteratorBase::valid()']]] +]; diff --git a/docs/generated-html/search/functions_13.html b/docs/generated-html/search/functions_13.html new file mode 100644 index 0000000000..051a1eb820 --- /dev/null +++ b/docs/generated-html/search/functions_13.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_13.js b/docs/generated-html/search/functions_13.js new file mode 100644 index 0000000000..f2593b4ad0 --- /dev/null +++ b/docs/generated-html/search/functions_13.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['wmmagemmglobaliteratorcd',['WmmaGemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a505f124fa3f47c6d57b7275e81be6dd3',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa5c14e2a799249fe8bba14aa1dbe69dc',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]] +]; diff --git a/docs/generated-html/search/functions_14.html b/docs/generated-html/search/functions_14.html new file mode 100644 index 0000000000..d5fdbda44e --- /dev/null +++ b/docs/generated-html/search/functions_14.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_14.js b/docs/generated-html/search/functions_14.js new file mode 100644 index 0000000000..10f55890b8 --- /dev/null +++ b/docs/generated-html/search/functions_14.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['_7eunique_5fptr',['~unique_ptr',['../classcutlass_1_1platform_1_1unique__ptr.html#a8902399dac4ab64f08f909f2ad9d4bcf',1,'cutlass::platform::unique_ptr']]] +]; diff --git a/docs/generated-html/search/functions_2.html b/docs/generated-html/search/functions_2.html new file mode 100644 index 0000000000..67d2a392cb --- /dev/null +++ b/docs/generated-html/search/functions_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_2.js b/docs/generated-html/search/functions_2.js new file mode 100644 index 0000000000..93a72e5ae6 --- /dev/null +++ b/docs/generated-html/search/functions_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['begin',['begin',['../structcutlass_1_1PredicateVector.html#a649045d8224514a4c28bcaf4b247b4a5',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/functions_3.html b/docs/generated-html/search/functions_3.html new file mode 100644 index 0000000000..1f0eedb334 --- /dev/null +++ b/docs/generated-html/search/functions_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_3.js b/docs/generated-html/search/functions_3.js new file mode 100644 index 0000000000..b9f86bbaa1 --- /dev/null +++ b/docs/generated-html/search/functions_3.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['check',['check',['../structcutlass_1_1platform_1_1is__base__of__helper.html#a5bf08859497e304ca353699ad6ac332b',1,'cutlass::platform::is_base_of_helper::check(DerivedT *, T)'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae8896817cabf297437b3a073e693ffd2',1,'cutlass::platform::is_base_of_helper::check(BaseT *, int)']]], + ['clamp',['clamp',['../structcutlass_1_1Coord.html#a482ada6da62f427987c22098796fcf7e',1,'cutlass::Coord']]], + ['clear',['clear',['../structcutlass_1_1Fragment.html#a29e7408fcde8cdf9de5e3a10eaa46391',1,'cutlass::Fragment::clear()'],['../structcutlass_1_1gemm_1_1ClearAccumulators.html#adb8026a19b09e9a581ec767c2c2da4ab',1,'cutlass::gemm::ClearAccumulators::clear()']]], + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1ClearAccumulators.html#a4ba07ea6d6fef961de1cb95b13c672ef',1,'cutlass::gemm::ClearAccumulators']]], + ['commit',['commit',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6ce2c6e81d159d8e9ab736cb263f44ae',1,'cutlass::gemm::GlobalLoadStreamBase::commit()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9cc435369c7fc76d0bb6233a8258e257',1,'cutlass::gemm::SharedLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a6dc512be014b9d849057e2fd4c0b0485',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::commit()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#ade2d85507dec77591e66276339a1eef5',1,'cutlass::gemm::GemmTraits::SharedLoadStream::commit()']]], + ['const_5fbegin',['const_begin',['../structcutlass_1_1PredicateVector.html#aeb7f9226a4fa49d06500c3c83958dc41',1,'cutlass::PredicateVector']]], + ['const_5fend',['const_end',['../structcutlass_1_1PredicateVector.html#ab931610bc07ee0e87bb4d9a4d53a2321',1,'cutlass::PredicateVector']]], + ['const_5fref',['const_ref',['../classcutlass_1_1TensorView.html#a23564f1d333bb16343ed3a885f894285',1,'cutlass::TensorView']]], + ['constiterator',['ConstIterator',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a1216aab9c567ec0d4232019008ef3ea7',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(ConstIterator const &it)'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a590e4f4533c87162c0b79e8d876a8fda',1,'cutlass::PredicateVector::ConstIterator::ConstIterator(PredicateVector const &_vec, int _start=0)']]], + ['constpredicatetileadapter',['ConstPredicateTileAdapter',['../structcutlass_1_1ConstPredicateTileAdapter.html#a9abd78d5c3e444bfb23d2b1a08be2be1',1,'cutlass::ConstPredicateTileAdapter']]], + ['contains',['contains',['../classcutlass_1_1TensorView.html#aa94063d9a9c6e599d3f53e22433274be',1,'cutlass::TensorView']]], + ['convert',['Convert',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a593a5a2c48708965e829d242ccb3b99f',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::Convert()'],['../classcutlass_1_1TensorRef.html#a7eb4444e2b3fce5a5ccde65a75df633c',1,'cutlass::TensorRef::convert()']]], + ['coord',['Coord',['../structcutlass_1_1Coord.html#a9cbfff91f0b0d0a149534c97e3d6e69b',1,'cutlass::Coord::Coord(int value=0)'],['../structcutlass_1_1Coord.html#a53a3d88a884f6cb7fda8aedfe2cec2c5',1,'cutlass::Coord::Coord(int _idx[])']]], + ['copy',['Copy',['../structcutlass_1_1Copy.html#ab2c20f886208396a1779c6d29b56c3f1',1,'cutlass::Copy::Copy()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#af7a15b4456cda01c1ffbb2fdc532e87e',1,'cutlass::gemm::GlobalLoadStreamBase::copy()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a7f6bf3b8d70bcd74d84519decd9f0d8e',1,'cutlass::gemm::SharedLoadStream::copy(FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a279144e9722055d4b862e3fa25948762',1,'cutlass::gemm::SharedLoadStream::copy(int d, FetchedFragment &fetched)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ae033f55779b45b4228f40a4d699062bb',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::copy()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#af25495bb0bb35bd64246d3a80fe4806f',1,'cutlass::gemm::GemmTraits::SharedLoadStream::copy()']]], + ['count',['count',['../structcutlass_1_1Coord.html#a40429a9154f7a142ad7e9eb35282d196',1,'cutlass::Coord']]], + ['cuda_5fperror_5fimpl',['cuda_perror_impl',['../namespacecutlass.html#a6d3dfeb642a2ce3d5f52243fe48f89cc',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/functions_4.html b/docs/generated-html/search/functions_4.html new file mode 100644 index 0000000000..c5bf87a473 --- /dev/null +++ b/docs/generated-html/search/functions_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_4.js b/docs/generated-html/search/functions_4.js new file mode 100644 index 0000000000..7288b40538 --- /dev/null +++ b/docs/generated-html/search/functions_4.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['data',['data',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3af66b82b1a0cc5bf6141f940553e048',1,'cutlass::gemm::GemmGlobalIteratorAb::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a0d3c1a58f23957f9850d1b22992a981a',1,'cutlass::gemm::GemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6fd4e62eb280a5b8c17eb79141414581',1,'cutlass::gemm::GemmGlobalIteratorCd::data() const'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#afe77778a126449e210c0bd6ec2dc6709',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a90e9886534ecbbce69f57b4030d0903f',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::data() const'],['../classcutlass_1_1TensorRef.html#a8e23c78658f45c6f197a1774cc85c5b7',1,'cutlass::TensorRef::data()'],['../classcutlass_1_1TensorView.html#a248e4240ccf96c976254464710a73fc8',1,'cutlass::TensorView::data()'],['../structcutlass_1_1TileLoadIterator.html#afb6320b600f1f561594a9fb543b954e4',1,'cutlass::TileLoadIterator::data()'],['../structcutlass_1_1TileStoreIterator.html#a5ebab59862d5f50ad980871515d999b0',1,'cutlass::TileStoreIterator::data()']]], + ['dot',['dot',['../structcutlass_1_1Coord.html#ad4b3704d14057c043f972827671115cf',1,'cutlass::Coord::dot(Coord const &b, T sum) const'],['../structcutlass_1_1Coord.html#ae023c0c664c22a978e9b9ce5e063aae4',1,'cutlass::Coord::dot(Coord const &b) const']]] +]; diff --git a/docs/generated-html/search/functions_5.html b/docs/generated-html/search/functions_5.html new file mode 100644 index 0000000000..a34446ce73 --- /dev/null +++ b/docs/generated-html/search/functions_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_5.js b/docs/generated-html/search/functions_5.js new file mode 100644 index 0000000000..64953e70f0 --- /dev/null +++ b/docs/generated-html/search/functions_5.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['end',['end',['../structcutlass_1_1PredicateVector.html#ad9493fc80fdc33330cc15641779cc275',1,'cutlass::PredicateVector']]], + ['epilogue',['epilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae1983e37454ed14272b23b964614c54c',1,'cutlass::gemm::GemmEpilogue']]], + ['epilogue_5fwith_5for_5fwithout_5fbeta',['epilogue_with_or_without_beta',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0c24dce365565f75e7edc1de1cb50ea4',1,'cutlass::gemm::GemmEpilogue']]], + ['evaluate',['evaluate',['../structcutlass_1_1gemm_1_1LinearScaling.html#a2e0d140aed388d2457dfb24d28fcd08a',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ &output)'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a47a53e5b67b2207fb3ba38a8b9cef448',1,'cutlass::gemm::LinearScaling::evaluate(Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)']]] +]; diff --git a/docs/generated-html/search/functions_6.html b/docs/generated-html/search/functions_6.html new file mode 100644 index 0000000000..6fd4b1f373 --- /dev/null +++ b/docs/generated-html/search/functions_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_6.js b/docs/generated-html/search/functions_6.js new file mode 100644 index 0000000000..3d4faf6765 --- /dev/null +++ b/docs/generated-html/search/functions_6.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['fill',['fill',['../structcutlass_1_1PredicateVector.html#a236bd1a822479750a809452fd58dd917',1,'cutlass::PredicateVector']]], + ['fragment_5fa',['fragment_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a4a8c64d85aa012e3689dd024c486924b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragment_5fb',['fragment_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#aa28f34fb0c4bf739246d92c2fef80e0b',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1FragmentConstIterator.html#ac4b6f351e6e72bed37e425f02a10c81e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(OtherFragment_ &fragment, int offset=0)'],['../structcutlass_1_1FragmentConstIterator.html#a3a8fd8f13c157ed13dc93fd78036c59e',1,'cutlass::FragmentConstIterator::FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1FragmentIterator.html#ae1825fe3e138e2aa62d27dab2b5227b4',1,'cutlass::FragmentIterator']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af19e14a22aefd1124f7d31beec6f8c42',1,'cutlass::gemm::FragmentMultiplyAdd::FragmentMultiplyAdd()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a21f0965f6178917c7f5c6d79ed048059',1,'cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd()']]] +]; diff --git a/docs/generated-html/search/functions_7.html b/docs/generated-html/search/functions_7.html new file mode 100644 index 0000000000..6e09abf1b2 --- /dev/null +++ b/docs/generated-html/search/functions_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_7.js b/docs/generated-html/search/functions_7.js new file mode 100644 index 0000000000..3ce9c5d824 --- /dev/null +++ b/docs/generated-html/search/functions_7.js @@ -0,0 +1,17 @@ +var searchData= +[ + ['gcd',['gcd',['../namespacecutlass.html#a38481ebfe13bc199aa621ceecfa016b8',1,'cutlass']]], + ['gemm',['Gemm',['../structcutlass_1_1gemm_1_1Gemm.html#a8bff0bd32aec05f8c1e282024be0bcfd',1,'cutlass::gemm::Gemm']]], + ['gemm_5fkernel',['gemm_kernel',['../namespacecutlass_1_1gemm.html#ad9577c9086b0f7fd1202d7f8109e4439',1,'cutlass::gemm']]], + ['gemmepilogue',['GemmEpilogue',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ab10147070c3a38fca75397f55dc51925',1,'cutlass::gemm::GemmEpilogue']]], + ['gemmglobaliteratorab',['GemmGlobalIteratorAb',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a34cb153d311377388e7819296a84d07e',1,'cutlass::gemm::GemmGlobalIteratorAb']]], + ['gemmglobaliteratorcd',['GemmGlobalIteratorCd',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6dae81995ab94c0b7f28eeeeb84a6c8d',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a64f1df43acb37a1901f0b55becaa9557',1,'cutlass::gemm::GemmGlobalIteratorCd::GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())']]], + ['get',['get',['../classcutlass_1_1PredicateVector_1_1Iterator.html#af035589126434bd2dbef4000cd864b8b',1,'cutlass::PredicateVector::Iterator::get()'],['../structcutlass_1_1ComputeOffsetFromShape.html#a3c6f60a59178ffb84899aa449bd51d38',1,'cutlass::ComputeOffsetFromShape::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html#a5198e838e3892245fe7b10884555ec93',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html#a11bf40abc57580db5ce4b0fd4c3e55ff',1,'cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides.html#af5e46bc2b325cb6952d2d68c8aca1409',1,'cutlass::ComputeOffsetFromStrides::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html#acdbb9c7cdf9fc054656614f72396434e',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html#a512a9d46f6bea9d85641d7263bcfee36',1,'cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides.html#a1744bfe277cbe0c642cce4a48c1dd9ad',1,'cutlass::ComputeThreadOffsetFromStrides::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html#a5d446b2663c01362361e09435a726996',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >::get()'],['../structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html#a6e621f5fae2ba29277fde46be1cede24',1,'cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >::get()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a2e7c14b8a118f81c1df46ea5045e297b',1,'cutlass::platform::unique_ptr::get()']]], + ['get_5fcoord_5fdhw',['get_Coord_dhw',['../namespacecutlass.html#a4680709eeeb679ef0219938f85f7394e',1,'cutlass']]], + ['get_5fcoord_5fhw',['get_Coord_hw',['../namespacecutlass.html#a7d2ab683e29b47d245e183ad5aeb962e',1,'cutlass::get_Coord_hw(Coord< 3 > const &coord)'],['../namespacecutlass.html#a082e7a2e4acc2879468243f5732ccf0b',1,'cutlass::get_Coord_hw(Coord< 4 > const &coord)']]], + ['get_5fcoord_5fhwc',['get_Coord_hwc',['../namespacecutlass.html#a71f3e2a12b9e98be1fba082610fa9d4f',1,'cutlass']]], + ['get_5fdeleter',['get_deleter',['../classcutlass_1_1platform_1_1unique__ptr.html#a5b8d8ecafb4da336acd50e40cd42b6e0',1,'cutlass::platform::unique_ptr::get_deleter() noexcept'],['../classcutlass_1_1platform_1_1unique__ptr.html#aa427ab4ea4f2336ac6db28d53a4c11ac',1,'cutlass::platform::unique_ptr::get_deleter() const noexcept']]], + ['globalloadstream',['GlobalLoadStream',['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a4dd11a75375b6b9d7b8dcbd4d402d8d6',1,'cutlass::gemm::GlobalLoadStream::GlobalLoadStream()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#ab2961b4db0694cf128d55d38a98db575',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::GlobalLoadStream()']]], + ['globalloadstreambase',['GlobalLoadStreamBase',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0fdc0f56d1352b5ad41fd4985edd3278',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['good',['good',['../classcutlass_1_1TensorRef.html#a0c049e523ee0fc98769ed8cd2d026780',1,'cutlass::TensorRef::good()'],['../classcutlass_1_1TensorView.html#a837881bc82704491accf54aad2b9def9',1,'cutlass::TensorView::good()']]] +]; diff --git a/docs/generated-html/search/functions_8.html b/docs/generated-html/search/functions_8.html new file mode 100644 index 0000000000..d59ea97198 --- /dev/null +++ b/docs/generated-html/search/functions_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_8.js b/docs/generated-html/search/functions_8.js new file mode 100644 index 0000000000..9418317cae --- /dev/null +++ b/docs/generated-html/search/functions_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['hgemmswizzle',['HgemmSwizzle',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ac3c52e0fee9b37a3dfc39ca168a63d36',1,'cutlass::gemm::HgemmSwizzle']]] +]; diff --git a/docs/generated-html/search/functions_9.html b/docs/generated-html/search/functions_9.html new file mode 100644 index 0000000000..5ccec42917 --- /dev/null +++ b/docs/generated-html/search/functions_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_9.js b/docs/generated-html/search/functions_9.js new file mode 100644 index 0000000000..04276e19e0 --- /dev/null +++ b/docs/generated-html/search/functions_9.js @@ -0,0 +1,22 @@ +var searchData= +[ + ['identityblockswizzle',['IdentityBlockSwizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#abfde9b316173b1c0b8622cf22ffb6d68',1,'cutlass::gemm::IdentityBlockSwizzle']]], + ['igemmepilogue',['IgemmEpilogue',['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#ab7a51121d24250d6441ee538e6521dc2',1,'cutlass::gemm::IgemmEpilogue::IgemmEpilogue()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a49ac00bed1532707aacd3ff108c84623',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::IgemmEpilogue()']]], + ['igemmfloattoint8converter',['IgemmFloatToInt8Converter',['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#ac65f020e93584b1bd3cdb849ff625026',1,'cutlass::gemm::IgemmFloatToInt8Converter']]], + ['igemmint8tofloatconverter',['IgemmInt8ToFloatConverter',['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a88a55a494d3a30d50477d50bf6a8804d',1,'cutlass::gemm::IgemmInt8ToFloatConverter']]], + ['igemmswizzle',['IgemmSwizzle',['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac041d287c966cf568599d7e462e81d5a',1,'cutlass::gemm::IgemmSwizzle']]], + ['inc_5fadvance',['inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a9dea455aa86bb59517b4a4d0309e424b',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_advance()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ab4b8150f19c9f8649d75c69ec0a76e1a',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a174ae7d8aa0664eaf1d6f63c5606baa0',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_advance()'],['../structcutlass_1_1TileLoadIterator.html#a91e13a7aad4b0acac002b6dd125abc37',1,'cutlass::TileLoadIterator::inc_advance()'],['../structcutlass_1_1TileStoreIterator.html#a1614b27755cf82c0e1f3e7852c5a4c75',1,'cutlass::TileStoreIterator::inc_advance()']]], + ['inc_5fc',['inc_c',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a12ead84ea9634e963d10c6df7b7792c9',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_c()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a44287250bf5631a490b514859fd101d1',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_c()']]], + ['inc_5fd',['inc_d',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1e42503e5a54cdc01308e9030aebdd35',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_d()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad26ab8d8010c9a1d7f3b91f60940b460',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab1ebbe54e4315ac07daf260a88f41d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_d()'],['../structcutlass_1_1TileLoadIterator.html#a0a93f37fd366a48c4ed6cc39aa850eb5',1,'cutlass::TileLoadIterator::inc_d()'],['../structcutlass_1_1TileStoreIterator.html#a74dffe1ddcc84935ab170117e939b7e3',1,'cutlass::TileStoreIterator::inc_d()']]], + ['inc_5fh',['inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aa24336597f4a3316d94df6ab0c20f714',1,'cutlass::gemm::GemmGlobalIteratorAb::inc_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ae07fa10a53d44471a04275145201299e',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa9a733f35e9be67663c9c8f80b0034d4',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_h()'],['../structcutlass_1_1TileLoadIterator.html#a228a95cf2c9c6089287984fcbf5cface',1,'cutlass::TileLoadIterator::inc_h()'],['../structcutlass_1_1TileStoreIterator.html#a3793f5d5846862f22f1de736e36ae7c1',1,'cutlass::TileStoreIterator::inc_h()']]], + ['inc_5fstage',['inc_stage',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#acf22fd09aa537943c16b900d66f1ec6f',1,'cutlass::gemm::SharedLoadStream::inc_stage()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8851150a49e4a9c135279c8c9dfdc592',1,'cutlass::gemm::GemmTraits::SharedLoadStream::inc_stage()'],['../structcutlass_1_1TileLoadIterator.html#aeb3faf5e8f976f5a4d158ceb41a1cc64',1,'cutlass::TileLoadIterator::inc_stage()'],['../structcutlass_1_1TileStoreIterator.html#a187e0852ec4862f6d3cb6249bedc3bb3',1,'cutlass::TileStoreIterator::inc_stage()']]], + ['inc_5fw',['inc_w',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a622a4dd27162854ec96efea93cdd4380',1,'cutlass::gemm::GemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aec2d692967d9be5d42673dfde21f5427',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::inc_w()'],['../structcutlass_1_1TileLoadIterator.html#a49cf3ee608debebf451cdd8c2125d073',1,'cutlass::TileLoadIterator::inc_w()'],['../structcutlass_1_1TileStoreIterator.html#aa573a47a9ffc3e07239a09e2bc470cf1',1,'cutlass::TileStoreIterator::inc_w()']]], + ['initialize',['initialize',['../structcutlass_1_1gemm_1_1Gemm_1_1Params.html#ac00c9d78a187d9c7d53399f971c0e129',1,'cutlass::gemm::Gemm::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a3e9d0fd2989fea776b0cab0e0f2813ce',1,'cutlass::gemm::GemmEpilogueTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a7c7e448384156c801ed362359a1a6a40',1,'cutlass::gemm::GlobalLoadStreamBase::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html#a73091e07b6d4c99f6e0319fbf6bd1709',1,'cutlass::gemm::GemmGlobalIteratorAb::Params::initialize()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#af5a496f1b6a46ea6a9894512029add6a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#adb66103b905b35a1594c6f0bab65758a',1,'cutlass::gemm::SharedLoadStream::Params::initialize()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a8e4d277325bb5e56c718a2298b60d3cf',1,'cutlass::gemm::SharedLoadStream::initialize()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a40023f0ffdd8bee4ccbcaac28222e983',1,'cutlass::gemm::GemmTraits::Params::initialize()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a4946e45e10661307f562b27bad5cb72d',1,'cutlass::gemm::LinearScaling::Params::initialize()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#ad6b65c5f3ed7cd9e7ffeb684cbf30d04',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::initialize()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad2631ffcc963638aa5b016c66a2e2c55',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a3ba93370bd4b2ede4bd4eb97ac0881be',1,'cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#af496afebb8983e5d346c681334955224',1,'cutlass::TileIteratorBase::Params::initialize()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aeeea0f8bdee876553a4908b9b7cbaf76',1,'cutlass::TileLoadIterator::Params::initialize(SharedStorage const &storage)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#afd9e82df76ad35fe883b7834457242b2',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aa3922946bb0da0c0040dec44aa389ec1',1,'cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#aebaecd0f971245ffc5a50fe5f7a9b4e8',1,'cutlass::TileLoadIterator::Params::initialize()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a71f5238a712f7b2f377fb58938ac829b',1,'cutlass::TileStoreIterator::Params::initialize(SharedStorage &storage)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af0d26a2df2a1a5ba3c3169b736bd5d43',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#ac1cfe92f1543ba445fa10f1859a0db98',1,'cutlass::TileStoreIterator::Params::initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#af884f720d36aa82e7f972932686ae986',1,'cutlass::TileStoreIterator::Params::initialize()']]], + ['initialize_5fpredicates',['initialize_predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab9375d9e779dcda79a5cd561bb3762ff',1,'cutlass::gemm::GemmGlobalIteratorAb::initialize_predicates()'],['../structcutlass_1_1TileIteratorBase.html#a78b6c0d6a1a96dd55a34bc302ecb07d7',1,'cutlass::TileIteratorBase::initialize_predicates()'],['../structcutlass_1_1TileLoadIterator.html#a8291a51bf96f86bc77d0e3453345dbd5',1,'cutlass::TileLoadIterator::initialize_predicates()'],['../structcutlass_1_1TileStoreIterator.html#af92ba20db048a9ec96976a1673f0f7c2',1,'cutlass::TileStoreIterator::initialize_predicates()']]], + ['is_5fzero',['is_zero',['../structcutlass_1_1PredicateVector.html#a1c4fe2bec906cd7937428ed6561ac79a',1,'cutlass::PredicateVector::is_zero()'],['../namespacecutlass_1_1gemm.html#a3e30ae89e6f7501725028144cd2d88cb',1,'cutlass::gemm::is_zero(T x)'],['../namespacecutlass_1_1gemm.html#a4a12fcfae60f26efa47bf0a79483d8ac',1,'cutlass::gemm::is_zero(half x)']]], + ['iterator',['Iterator',['../classcutlass_1_1PredicateVector_1_1Iterator.html#a91b7d25cbd64e696ef23c87671f0b077',1,'cutlass::PredicateVector::Iterator::Iterator(Iterator const &it)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a83c2f584bd061f0b9b6b2a6cddf5b038',1,'cutlass::PredicateVector::Iterator::Iterator(PredicateVector &_vec, int _start=0)']]], + ['iterator_5fload',['iterator_load',['../namespacecutlass.html#a45dd7add04736cb5c3e69991d2f210be',1,'cutlass::iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a50f08aa93d7fe6825599d17e3c977031',1,'cutlass::iterator_load(InputIterator const &_iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#aca491136bdb966638a7ae57c47f86d1e',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#af25d56f7391322d9a3b9aa3c507f90dc',1,'cutlass::iterator_load(InputIterator const &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fload_5fpost_5fincrement',['iterator_load_post_increment',['../namespacecutlass.html#a3965068d8a4fdfe5e05782930fb4fe6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#af5abe551df7461eab66aa43907063d6b',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, typename InputIterator::Index offset=0)'],['../namespacecutlass.html#afb8e7a4e611e8b5ae7ca19d02f791d37',1,'cutlass::iterator_load_post_increment(InputIterator &iterator, Fragment &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore',['iterator_store',['../namespacecutlass.html#a0cb5bdf7bef498705c51a9cdcbef71f9',1,'cutlass::iterator_store(OutputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#a88dce4b124a294cc123f7cf5fd2d6472',1,'cutlass::iterator_store(OutputIterator const &_iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#a410ed4d45ccafc2db842967740b6211f',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#ad804b804ac19360b293046f9cbfd8dd5',1,'cutlass::iterator_store(OutputIterator const &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]], + ['iterator_5fstore_5fpost_5fincrement',['iterator_store_post_increment',['../namespacecutlass.html#a5bf15cbf4cf4649d895fcbc2edf6a2de',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset, ConstPredicateAdapter predicate_adapter)'],['../namespacecutlass.html#ab8efb0edefca7a59acc5a14b7311130c',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, typename OutputIterator::Index offset=0)'],['../namespacecutlass.html#a96fdb65e922f6a3d46aa5de9ea78d460',1,'cutlass::iterator_store_post_increment(OutputIterator &iterator, Fragment const &fragment, ConstPredicateAdapter pred_it)']]] +]; diff --git a/docs/generated-html/search/functions_a.html b/docs/generated-html/search/functions_a.html new file mode 100644 index 0000000000..3958eb7b9a --- /dev/null +++ b/docs/generated-html/search/functions_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_a.js b/docs/generated-html/search/functions_a.js new file mode 100644 index 0000000000..61f3d1a74a --- /dev/null +++ b/docs/generated-html/search/functions_a.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['launch',['launch',['../structcutlass_1_1gemm_1_1Gemm.html#a77ae137aec79b4061a9ffa09aabf641c',1,'cutlass::gemm::Gemm::launch(Params const &params, cudaStream_t stream=cudaStreamDefault)'],['../structcutlass_1_1gemm_1_1Gemm.html#a4f4122a2ae8b9b09a9660e5c2ca9e906',1,'cutlass::gemm::Gemm::launch(CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)']]], + ['lcm',['lcm',['../namespacecutlass.html#af07506fee11de882d926f4e8237eef09',1,'cutlass']]], + ['leading_5fdim',['leading_dim',['../classcutlass_1_1TensorRef.html#a8e1c61910ffb49ec64930f66dd342b77',1,'cutlass::TensorRef']]], + ['linearscaling',['LinearScaling',['../structcutlass_1_1gemm_1_1LinearScaling.html#a34df6970f033b3090ad8f4d40063b1b2',1,'cutlass::gemm::LinearScaling']]], + ['load',['load',['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a01a847858cb330d7d109ddee228e96ce',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a014682b143bce65667075ea15fad184d',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load()'],['../structcutlass_1_1Load.html#ad033ebc1452d96b18913333bf7068140',1,'cutlass::Load::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#aa9d5e227ea20ad3c6952f296016ec167',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a0e58d26dd68aabb6cb9678f5656c7e6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::load()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a7ba77016bee8e941f7831cc9fbfa994d',1,'cutlass::Load< double, 2, Memory_, true, 16 >::load()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a4ee00178c441bdf4d4a1f8cf984bc03f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::load()'],['../structcutlass_1_1TileLoadIterator.html#a9c4b332857f419e6f789a93404dc2140',1,'cutlass::TileLoadIterator::load(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileLoadIterator.html#a1058cdec33393db9c16b28c21d8957db',1,'cutlass::TileLoadIterator::load(Fragment &fragment) const']]], + ['load_5fpost_5fincrement',['load_post_increment',['../structcutlass_1_1TileLoadIterator.html#a2716b9010d2902b90e63abb0531ee915',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileLoadIterator.html#a195993d58ae0eeb53203116ac02ab38d',1,'cutlass::TileLoadIterator::load_post_increment(Fragment &fragment)']]] +]; diff --git a/docs/generated-html/search/functions_b.html b/docs/generated-html/search/functions_b.html new file mode 100644 index 0000000000..b99b702d28 --- /dev/null +++ b/docs/generated-html/search/functions_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_b.js b/docs/generated-html/search/functions_b.js new file mode 100644 index 0000000000..7b4b2eb9a3 --- /dev/null +++ b/docs/generated-html/search/functions_b.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['make_5fcoord',['make_Coord',['../namespacecutlass.html#a7419519fa453a121dfa5f26bf87318d9',1,'cutlass::make_Coord(int _0)'],['../namespacecutlass.html#a61d81e5363bcb8a7f6dd70f053242564',1,'cutlass::make_Coord(int _0, int _1)'],['../namespacecutlass.html#a25acf680a7d2592c957a7ac603f4c361',1,'cutlass::make_Coord(int _0, int _1, int _2)'],['../namespacecutlass.html#a9410b1f5956d3aaf4584e65d047428fc',1,'cutlass::make_Coord(int _0, int _1, int _2, int _3)']]], + ['make_5fpair',['make_pair',['../namespacecutlass_1_1platform.html#a90ce74c7faa4e27c888ce56e957b73d5',1,'cutlass::platform']]], + ['make_5fzero',['make_zero',['../namespacecutlass.html#acdb62db582cf90cfd437fc56f4ca7bbf',1,'cutlass::make_zero(Scalar_ &x)'],['../namespacecutlass.html#abc5c00b4986db5a114e774cee9999717',1,'cutlass::make_zero(Vector< Scalar_, kLanes_ > &vec)']]], + ['max',['max',['../namespacecutlass_1_1platform.html#af6a9a165e53d7e85ae121d5789aa03e0',1,'cutlass::platform']]], + ['min',['min',['../namespacecutlass_1_1platform.html#a57c071d2a7305dd4ec60542e66b0c81c',1,'cutlass::platform']]], + ['multiply',['multiply',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a522301fbe3e276cb5ef9fbe75bb2ab50',1,'cutlass::gemm::FragmentMultiplyAdd::multiply()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ae62d61ec068ac958753d0a2f5a99d8e2',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply()']]], + ['multiply_5fadd',['multiply_add',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a34bbf209967fef6181d3d46dd27fa0c0',1,'cutlass::gemm::FragmentMultiplyAdd::multiply_add()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a89c8b663af69f13c2a02cb464b5172a5',1,'cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add()'],['../structcutlass_1_1gemm_1_1Gemm.html#a2e844037d2527b842de3590cb783a49f',1,'cutlass::gemm::Gemm::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a66486d38349fa20eb065ae9542eb43aa',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad22dd143c304c22c2630aedbfd3459af',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5dcf66c8126ec8adf8e66d4bf5b2f347',1,'cutlass::gemm::ThreadMultiplyAdd::multiply_add()']]] +]; diff --git a/docs/generated-html/search/functions_c.html b/docs/generated-html/search/functions_c.html new file mode 100644 index 0000000000..3a33d874d6 --- /dev/null +++ b/docs/generated-html/search/functions_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_c.js b/docs/generated-html/search/functions_c.js new file mode 100644 index 0000000000..14ca492a38 --- /dev/null +++ b/docs/generated-html/search/functions_c.js @@ -0,0 +1,31 @@ +var searchData= +[ + ['offset',['offset',['../classcutlass_1_1TensorRef.html#a02ee5d16ed4ce4705a99bb16b2ae1ae8',1,'cutlass::TensorRef::offset()'],['../classcutlass_1_1TensorView.html#a064f3630e69798e7915f910c4ee99ab7',1,'cutlass::TensorView::offset()']]], + ['operator_20_26_3d',['operator &=',['../structcutlass_1_1PredicateVector.html#a3dd9aeba8f3cbe7a8198d68d91a0bbb9',1,'cutlass::PredicateVector']]], + ['operator_20b_2a',['operator B*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8d100273203db9018dffbbe84e0b6c76',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20bool',['operator bool',['../classcutlass_1_1platform_1_1unique__ptr.html#a5791650488ae864f10ad04bec4a31005',1,'cutlass::platform::unique_ptr']]], + ['operator_20d_2a',['operator D*',['../structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html#a8aadc500baf1492b1a4d05cc8b35fc13',1,'cutlass::platform::is_base_of_helper::dummy']]], + ['operator_20value_5ftype',['operator value_type',['../structcutlass_1_1platform_1_1integral__constant.html#a55d25116387f1c6d978462b1d245d675',1,'cutlass::platform::integral_constant']]], + ['operator_21_3d',['operator!=',['../structcutlass_1_1Coord.html#a7fb46873e8f3cf38212703d35bd36995',1,'cutlass::Coord::operator!=()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a3d06715a77740034697686a7977cb685',1,'cutlass::PredicateVector::ConstIterator::operator!=()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a08cb4d1395b88a4451fbb1a27e010887',1,'cutlass::PredicateVector::Iterator::operator!=()'],['../namespacecutlass_1_1platform.html#a248f49adf09654d2cd04bd2760ab2566',1,'cutlass::platform::operator!=()']]], + ['operator_28_29',['operator()',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html#ab8adb983c0573a0015469f40a75287be',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html#abaf0d4459a64b3e9533758b59600bd52',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html#a1e357fe5bc1daef333e6be776a21a2ca',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html#a4e35f0b2ca63a6b981230b73f843f726',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html#a51a325b435b9a53effaa003b3670e410',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html#a5b4a635a521364357386259b0f84c0ba',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html#a4f9cca16303ac9ae29a0eaa11dcc23b6',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html#ace1b936cab289c6884e673312283d422',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html#a9fc1ca09733113f80fe5fe45db3d9b81',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html#a1228edf6cc0f81af520dc77c8792b94c',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadOffset::operator()()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html#ad7537f8b30ee6913cf4afa1d3c054e68',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::ThreadOffset::operator()()'],['../structcutlass_1_1TiledThreadOffset.html#a7290b6ca9ef0bede634f69bd05450fa2',1,'cutlass::TiledThreadOffset::operator()()'],['../structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html#a0e4edffb19218ccbf77995f6d20df000',1,'cutlass::TileTraitsWarpRake::ThreadOffset::operator()()'],['../structcutlass_1_1platform_1_1plus.html#a3bf1e5147df4287bf58ad8f11ea0d98c',1,'cutlass::platform::plus::operator()()'],['../structcutlass_1_1platform_1_1less.html#adfb49ee70a700a8483c70b4b353f6bc5',1,'cutlass::platform::less::operator()()'],['../structcutlass_1_1platform_1_1greater.html#a8d56cf343dd33acebe19d0b51abe3978',1,'cutlass::platform::greater::operator()()'],['../structcutlass_1_1platform_1_1integral__constant.html#a5271a533526a535ae8b783c736252f18',1,'cutlass::platform::integral_constant::operator()()'],['../structcutlass_1_1platform_1_1default__delete.html#a59e6e3cc95685ac34fa6f9cf301b3a15',1,'cutlass::platform::default_delete::operator()()'],['../structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html#a16c5595a5aec7d7ee34e38bef4a66c87',1,'cutlass::platform::default_delete< T[]>::operator()()']]], + ['operator_2a',['operator*',['../structcutlass_1_1Coord.html#a8e4f7df55a75d040cf50cf9984c04c8a',1,'cutlass::Coord::operator*()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#abbc2bceb6cf8d7f168b8a00eb48c0946',1,'cutlass::PredicateVector::ConstIterator::operator*()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a049b568e0f5de011ee76ce79bcedbab4',1,'cutlass::PredicateVector::Iterator::operator*()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#a78016158f99dd87e822a2a2cbd4cec78',1,'cutlass::PredicateVector::TrivialIterator::operator*()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a45a3cb6d8641a6130991d56e84cbb38b',1,'cutlass::platform::unique_ptr::operator*()']]], + ['operator_2a_3d',['operator*=',['../structcutlass_1_1Coord.html#a282b6cc9ac8b2f72720c252791155aad',1,'cutlass::Coord']]], + ['operator_2b',['operator+',['../structcutlass_1_1Coord.html#a3dfc4ce4191097b6c3268696f2a45ef5',1,'cutlass::Coord::operator+()'],['../classcutlass_1_1TensorRef.html#aa7b80d225c01c9dc12aafc515cf15842',1,'cutlass::TensorRef::operator+()']]], + ['operator_2b_2b',['operator++',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a10ee4bb2f206432aa5ee1a83cb046b70',1,'cutlass::PredicateVector::ConstIterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a977a99af3166a58d5bc5a613a1abe7d5',1,'cutlass::PredicateVector::ConstIterator::operator++(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a7dddc0a6b5c958156beef29bedfd1bd3',1,'cutlass::PredicateVector::Iterator::operator++()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a6c7333ad14d545cafc707e78752bf1e3',1,'cutlass::PredicateVector::Iterator::operator++(int)'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#ad24e9b451064e99fb19955f772c30e6a',1,'cutlass::PredicateVector::TrivialIterator::operator++()'],['../structcutlass_1_1PredicateVector_1_1TrivialIterator.html#aa35b9165920b83b9a5a888df83925051',1,'cutlass::PredicateVector::TrivialIterator::operator++(int)']]], + ['operator_2b_3d',['operator+=',['../structcutlass_1_1Coord.html#aeb209486943fa9d42911325b16e49e09',1,'cutlass::Coord']]], + ['operator_2d',['operator-',['../structcutlass_1_1Coord.html#acc510511ffb52bed7f6a52f14b99750d',1,'cutlass::Coord::operator-()'],['../classcutlass_1_1TensorRef.html#a3843ccfd1d097f25eff45dc159709938',1,'cutlass::TensorRef::operator-()']]], + ['operator_2d_2d',['operator--',['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2763012a9284e97650b14e20c5668286',1,'cutlass::PredicateVector::ConstIterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#a2910a714d34a688b8ea560ea2933436b',1,'cutlass::PredicateVector::ConstIterator::operator--(int)'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a69fb5b24eeb43331b7401768e8584e61',1,'cutlass::PredicateVector::Iterator::operator--()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#aad709a11f43b84c88e3ce3a0394f8e8a',1,'cutlass::PredicateVector::Iterator::operator--(int)']]], + ['operator_2d_3d',['operator-=',['../structcutlass_1_1Coord.html#ac1795ec2a5890d8a39840567a4bea88e',1,'cutlass::Coord']]], + ['operator_2d_3e',['operator->',['../classcutlass_1_1platform_1_1unique__ptr.html#afa52edcaef23461ce1f9c1dac349c24b',1,'cutlass::platform::unique_ptr']]], + ['operator_2f',['operator/',['../structcutlass_1_1Coord.html#a87f485be079fa68bcf576da4d56f0ece',1,'cutlass::Coord']]], + ['operator_2f_3d',['operator/=',['../structcutlass_1_1Coord.html#abe91e59962ef0d73aec9c14824f64ecc',1,'cutlass::Coord']]], + ['operator_3c',['operator<',['../namespacecutlass_1_1platform.html#a412dbdbc678ecd12b55fcad4ef4155bd',1,'cutlass::platform']]], + ['operator_3c_3c',['operator<<',['../core__io_8h.html#a4a0d84a2a19a11549b87a2328d58690d',1,'core_io.h']]], + ['operator_3c_3d',['operator<=',['../namespacecutlass_1_1platform.html#a41d573133357bd555f78d33afc1152d3',1,'cutlass::platform']]], + ['operator_3d',['operator=',['../classcutlass_1_1TensorView.html#aa9e9e19f35ce3111f64b763ca49b51ef',1,'cutlass::TensorView']]], + ['operator_3d_3d',['operator==',['../structcutlass_1_1Coord.html#acfa94aabd0c9a71ee994ca479d5f515f',1,'cutlass::Coord::operator==()'],['../classcutlass_1_1PredicateVector_1_1ConstIterator.html#aa2d03d88ac23051803d010f78157c357',1,'cutlass::PredicateVector::ConstIterator::operator==()'],['../classcutlass_1_1PredicateVector_1_1Iterator.html#a5c5266fcef67c7b263682c4bc4a5000e',1,'cutlass::PredicateVector::Iterator::operator==()'],['../namespacecutlass_1_1platform.html#ab9b8306ae9dc21fa646c49b68fa8e197',1,'cutlass::platform::operator==()']]], + ['operator_3e',['operator>',['../namespacecutlass_1_1platform.html#a9e8e698d40b8df881991fde9ba2a1b12',1,'cutlass::platform']]], + ['operator_3e_3d',['operator>=',['../namespacecutlass_1_1platform.html#ab0f21e67c0a4b5c6952042b502c6816f',1,'cutlass::platform']]], + ['operator_5b_5d',['operator[]',['../structcutlass_1_1Coord.html#ab7fc89de3ccd7096ab275fb5dd40104c',1,'cutlass::Coord::operator[](int dim)'],['../structcutlass_1_1Coord.html#a6eeab0a1686ee25389e1bd017c5f03ae',1,'cutlass::Coord::operator[](int dim) const'],['../structcutlass_1_1Fragment.html#a99fef5f3093b2df50905ab13819b67a0',1,'cutlass::Fragment::operator[](int i)'],['../structcutlass_1_1Fragment.html#a75f51bb6ca84615076aab42ac9d42592',1,'cutlass::Fragment::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a83bb6a3ed588e2d890bf986665d2b7bb',1,'cutlass::FragmentIterator::operator[](int i) const'],['../structcutlass_1_1FragmentIterator.html#a3bd2a9d8467f8db02ca3a01ae0c11ad7',1,'cutlass::FragmentIterator::operator[](int i)'],['../structcutlass_1_1FragmentConstIterator.html#af16f2aa14ff424b038a393b683c4783e',1,'cutlass::FragmentConstIterator::operator[]()'],['../structcutlass_1_1PredicateVector.html#a840985438ac8306ec680eb20edd4e5c5',1,'cutlass::PredicateVector::operator[]()'],['../classcutlass_1_1TensorRef.html#a6a2aa88ed77557c089a165da0df1e974',1,'cutlass::TensorRef::operator[](Coord< Rank > const &coord) const'],['../classcutlass_1_1TensorRef.html#a34e97ab2190b4681d1c1199186d66f1c',1,'cutlass::TensorRef::operator[](int idx) const'],['../classcutlass_1_1TensorView.html#a7fe7e44e15fd1ac58fb55edf72e8fb23',1,'cutlass::TensorView::operator[]()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a5c7a204af07a7d325b0a8303e199a50d',1,'cutlass::platform::unique_ptr::operator[]()'],['../unioncutlass_1_1Vector.html#a250860c921c94a6077344f9e11bf5b02',1,'cutlass::Vector::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector.html#a44cc27bf8a7b789b4ae8538155a50156',1,'cutlass::Vector::operator[](uint32_t i)'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab0516cef8949f5998b5251cc6b6db683',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i) const'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a8ade80e040264fbd669d3f15c249884e',1,'cutlass::Vector< half, kLanes_ >::operator[](uint32_t i)']]], + ['operator_7c_3d',['operator|=',['../structcutlass_1_1PredicateVector.html#aab9de134132c62de1c062ca57582cdbc',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/functions_d.html b/docs/generated-html/search/functions_d.html new file mode 100644 index 0000000000..31b75b8890 --- /dev/null +++ b/docs/generated-html/search/functions_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_d.js b/docs/generated-html/search/functions_d.js new file mode 100644 index 0000000000..8aefe3b0fd --- /dev/null +++ b/docs/generated-html/search/functions_d.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['predicatetileadapter',['PredicateTileAdapter',['../structcutlass_1_1PredicateTileAdapter.html#a4c9eb6c6498ccf117427a3b35f7ce5ea',1,'cutlass::PredicateTileAdapter']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1PredicateVector.html#aec1201df19c0ed0516810a3f19353c21',1,'cutlass::PredicateVector']]], + ['project',['project',['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html#ae91b2350374f1734a30cbed45e14b8e3',1,'cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html#a0f1579013f56fe16ebc147271f163c3c',1,'cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html#af2a323461334a6b55b95074a1973d250',1,'cutlass::gemm::ProjectOperand< GemmOperand::kC, true >::project()'],['../structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html#ace04040ccb13af5f9a283ca80ffe93d1',1,'cutlass::gemm::ProjectOperand< GemmOperand::kD, true >::project()']]] +]; diff --git a/docs/generated-html/search/functions_e.html b/docs/generated-html/search/functions_e.html new file mode 100644 index 0000000000..cddb9bb586 --- /dev/null +++ b/docs/generated-html/search/functions_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_e.js b/docs/generated-html/search/functions_e.js new file mode 100644 index 0000000000..8bc9b11121 --- /dev/null +++ b/docs/generated-html/search/functions_e.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['ref',['ref',['../classcutlass_1_1TensorView.html#a8650860460ea24944c803a671095be09',1,'cutlass::TensorView::ref()'],['../classcutlass_1_1TensorView.html#a5cbff89d3d8dc71d27a4d6c1d7abb58a',1,'cutlass::TensorView::ref() const']]], + ['release',['release',['../classcutlass_1_1platform_1_1unique__ptr.html#a7ac06ebe7bc66573d3225891e12d2279',1,'cutlass::platform::unique_ptr']]], + ['reset',['reset',['../classcutlass_1_1TensorRef.html#abefe392e81da2c09cb127f963ae90674',1,'cutlass::TensorRef::reset()'],['../classcutlass_1_1TensorView.html#a8b1785a1ea5d7aa7eba8e45297d539d3',1,'cutlass::TensorView::reset()'],['../classcutlass_1_1platform_1_1unique__ptr.html#a6740f71511f5495d6038cf8878862331',1,'cutlass::platform::unique_ptr::reset()']]], + ['residue',['residue',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aae1adef6312e069e59a83d38c03116f9',1,'cutlass::gemm::GlobalLoadStreamBase::residue()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#aab37ea6c47e34466371314ed3971dc7b',1,'cutlass::gemm::GemmGlobalIteratorAb::residue()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a405b93680bb6e356369863244d0b56aa',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::residue()']]], + ['round_5fnearest',['round_nearest',['../namespacecutlass.html#a17c8c408d672d26f1c70d2435f6ac83e',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/functions_f.html b/docs/generated-html/search/functions_f.html new file mode 100644 index 0000000000..4967292615 --- /dev/null +++ b/docs/generated-html/search/functions_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/functions_f.js b/docs/generated-html/search/functions_f.js new file mode 100644 index 0000000000..183adc887b --- /dev/null +++ b/docs/generated-html/search/functions_f.js @@ -0,0 +1,16 @@ +var searchData= +[ + ['set',['set',['../classcutlass_1_1PredicateVector_1_1Iterator.html#aadfd039b5622098c9e46706a27122575',1,'cutlass::PredicateVector::Iterator::set()'],['../structcutlass_1_1PredicateVector.html#a062fa8a8df725ef08ced2ffcca8336af',1,'cutlass::PredicateVector::set()'],['../structcutlass_1_1PredicateTileAdapter.html#aeda47efdda0387f9c3c7b31f836afca5',1,'cutlass::PredicateTileAdapter::set()']]], + ['shared_5fiterator_5fload',['shared_iterator_load',['../namespacecutlass.html#abcec976c59cab75ca55b338d125154a3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment)'],['../namespacecutlass.html#aa9416026c6db08d92a34c2ac08fea8c3',1,'cutlass::shared_iterator_load(InputIterator &iterator, Fragment &fragment, int d)']]], + ['shared_5fiterator_5fstore',['shared_iterator_store',['../namespacecutlass.html#a705c6d75513e112d2731d1c40f4cf109',1,'cutlass']]], + ['shared_5fload_5ffence',['shared_load_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9b5e42f222fec98ff479bc1650221b84',1,'cutlass::gemm::GemmEpilogue::shared_load_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a475463c1e3af71598e22da8956900ebe',1,'cutlass::gemm::GemmTraits::shared_load_fence()']]], + ['shared_5fstore_5ffence',['shared_store_fence',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac1b2a16b4ccf3e9617faf4d8a2c43691',1,'cutlass::gemm::GemmEpilogue::shared_store_fence()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ac3c840a3d90c0da43301761af83c2c9f',1,'cutlass::gemm::GemmTraits::shared_store_fence()']]], + ['sharedloadstream',['SharedLoadStream',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6e097738679436d580e8dc6ac70efaad',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a93e9bcdca4ceb68754fb1f73e2b25d25',1,'cutlass::gemm::SharedLoadStream::SharedLoadStream(Params const &params, SharedStorage &shared_storage)'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a49315aea1c54d84ff19b0ac215128b95',1,'cutlass::gemm::GemmTraits::SharedLoadStream::SharedLoadStream()']]], + ['size',['size',['../classcutlass_1_1TensorView.html#a541a7c22e7109d4059044f146fe69027',1,'cutlass::TensorView::size() const'],['../classcutlass_1_1TensorView.html#a6218d8555679966eab784a6bb1fa4ed1',1,'cutlass::TensorView::size(int dim) const']]], + ['store',['store',['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#a118c78aa6b0ae0f0c78889689b6878c8',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a45319520b7d341c66bd54d3e8fec48f8',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store()'],['../structcutlass_1_1Store.html#a1117fa7b7bdeeb3a7f2d647a1d340aaf',1,'cutlass::Store::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a00f6bb93d318bf4cff35c9dabc630167',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a027980b8456243974b0c442866a66e3a',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::store()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ab70d04589637f285f861902f649f834e',1,'cutlass::Store< double, 2, Memory_, true, 16 >::store()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aa130564bb2eba7b07e1f183c98f1d9e2',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::store()'],['../structcutlass_1_1TileStoreIterator.html#a53820de506cecb1f5fb07b3385d8272a',1,'cutlass::TileStoreIterator::store(Fragment &fragment, PredicateIterator pred_it) const'],['../structcutlass_1_1TileStoreIterator.html#a60258b7c1a1708f97e28f8f6c292bfe4',1,'cutlass::TileStoreIterator::store(Fragment &fragment) const']]], + ['store_5fpost_5fincrement',['store_post_increment',['../structcutlass_1_1TileStoreIterator.html#a57aa2c36eb6ad9d2500c1f5396b3a526',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment, PredicateIterator pred_it)'],['../structcutlass_1_1TileStoreIterator.html#ae63949f58c1b32959bbfa5b64d521f0f',1,'cutlass::TileStoreIterator::store_post_increment(Fragment &fragment)']]], + ['stride',['stride',['../classcutlass_1_1TensorRef.html#a89380141d25528c4c7ba6c365b96a878',1,'cutlass::TensorRef::stride() const'],['../classcutlass_1_1TensorRef.html#af47f192552544272774a29d7a0829a31',1,'cutlass::TensorRef::stride(int dim) const'],['../classcutlass_1_1TensorView.html#a3ac125a25199fd91f73d2cfe9fc3d09b',1,'cutlass::TensorView::stride() const'],['../classcutlass_1_1TensorView.html#a522630bb0df977282a9bff17e6fee843',1,'cutlass::TensorView::stride(int dim) const']]], + ['subview',['subview',['../classcutlass_1_1TensorView.html#aee43c516397d7c06eb8012711d8d7c15',1,'cutlass::TensorView']]], + ['swap',['swap',['../classcutlass_1_1platform_1_1unique__ptr.html#a748d413c50bdbbe9e2f9986fbc423036',1,'cutlass::platform::unique_ptr::swap()'],['../namespacecutlass_1_1platform.html#a3e83320a39137d92042eb0bf93be9678',1,'cutlass::platform::swap()']]], + ['swizzle',['swizzle',['../structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html#a0a366c072ee66bbcb390acd7b8bbe5f8',1,'cutlass::gemm::IdentityBlockSwizzle']]] +]; diff --git a/docs/generated-html/search/groups_0.html b/docs/generated-html/search/groups_0.html new file mode 100644 index 0000000000..f4895cb403 --- /dev/null +++ b/docs/generated-html/search/groups_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_0.js b/docs/generated-html/search/groups_0.js new file mode 100644 index 0000000000..1b24cdfd0c --- /dev/null +++ b/docs/generated-html/search/groups_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['fragment_20concept',['Fragment Concept',['../group__fragment__concept.html',1,'']]], + ['fragment_20iterator_20concept',['Fragment Iterator Concept',['../group__fragment__iterator__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/groups_1.html b/docs/generated-html/search/groups_1.html new file mode 100644 index 0000000000..31952659a7 --- /dev/null +++ b/docs/generated-html/search/groups_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_1.js b/docs/generated-html/search/groups_1.js new file mode 100644 index 0000000000..5ebe29b052 --- /dev/null +++ b/docs/generated-html/search/groups_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['layout_20concept',['Layout Concept',['../group__layout__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/groups_2.html b/docs/generated-html/search/groups_2.html new file mode 100644 index 0000000000..58824467c3 --- /dev/null +++ b/docs/generated-html/search/groups_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_2.js b/docs/generated-html/search/groups_2.js new file mode 100644 index 0000000000..b7fc0e347b --- /dev/null +++ b/docs/generated-html/search/groups_2.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['predicate_20iterator_20concept',['Predicate Iterator Concept',['../group__predicate__iterator__concept.html',1,'']]], + ['predicate_20tile_20adapter_20concept',['Predicate Tile Adapter Concept',['../group__predicate__tile__adapter.html',1,'']]], + ['predicate_20vector_20concept',['Predicate Vector Concept',['../group__predicate__vector__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/groups_3.html b/docs/generated-html/search/groups_3.html new file mode 100644 index 0000000000..bd23aa6e12 --- /dev/null +++ b/docs/generated-html/search/groups_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/groups_3.js b/docs/generated-html/search/groups_3.js new file mode 100644 index 0000000000..e48e674fad --- /dev/null +++ b/docs/generated-html/search/groups_3.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['tile_20load_20iterator_20concept',['Tile Load Iterator Concept',['../group__tile__load__iterator__concept.html',1,'']]], + ['tile_20store_20iterator_20concept',['Tile Store Iterator Concept',['../group__tile__store__iterator__concept.html',1,'']]], + ['tile_20traits_20concept',['Tile Traits Concept',['../group__tile__traits__concept.html',1,'']]] +]; diff --git a/docs/generated-html/search/mag_sel.png b/docs/generated-html/search/mag_sel.png new file mode 100644 index 0000000000000000000000000000000000000000..81f6040a2092402b4d98f9ffa8855d12a0d4ca17 GIT binary patch literal 563 zcmV-30?hr1P)zxx&tqG15pu7)IiiXFflOc2k;dXd>%13GZAy? zRz!q0=|E6a6vV)&ZBS~G9oe0kbqyw1*gvY`{Pop2oKq#FlzgXt@Xh-7fxh>}`Fxg> z$%N%{$!4=5nM{(;=c!aG1Ofr^Do{u%Ih{^&Fc@H2)+a-?TBXrw5DW&z%Nb6mQ!L9O zl}b@6mB?f=tX3;#vl)}ggh(Vpyh(IK z(Mb0D{l{U$FsRjP;!{($+bsaaVi8T#1c0V#qEIOCYa9@UVLV`f__E81L;?WEaRA;Y zUH;rZ;vb;mk7JX|$=i3O~&If0O@oZfLg8gfIjW=dcBsz;gI=!{-r4# z4%6v$&~;q^j7Fo67yJ(NJWuX+I~I!tj^nW3?}^9bq|<3^+vapS5sgM^x7!cs(+mMT z&y%j};&~po+YO)3hoUH4E*E;e9>?R6SS&`X)p`njycAVcg{rEb41T{~Hk(bl-7eSb zmFxA2uIqo#@R?lKm50ND`~6Nfn|-b1|L6O98vt3Tx@gKz#isxO002ovPDHLkV1kyW B_l^Jn literal 0 HcmV?d00001 diff --git a/docs/generated-html/search/namespaces_0.html b/docs/generated-html/search/namespaces_0.html new file mode 100644 index 0000000000..f55ca63aa2 --- /dev/null +++ b/docs/generated-html/search/namespaces_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/namespaces_0.js b/docs/generated-html/search/namespaces_0.js new file mode 100644 index 0000000000..538984567e --- /dev/null +++ b/docs/generated-html/search/namespaces_0.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['cutlass',['cutlass',['../namespacecutlass.html',1,'']]], + ['gemm',['gemm',['../namespacecutlass_1_1gemm.html',1,'cutlass']]], + ['platform',['platform',['../namespacecutlass_1_1platform.html',1,'cutlass']]] +]; diff --git a/docs/generated-html/search/namespaces_1.html b/docs/generated-html/search/namespaces_1.html new file mode 100644 index 0000000000..37c816cc45 --- /dev/null +++ b/docs/generated-html/search/namespaces_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/namespaces_1.js b/docs/generated-html/search/namespaces_1.js new file mode 100644 index 0000000000..5f15704219 --- /dev/null +++ b/docs/generated-html/search/namespaces_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['nv_5fstd',['nv_std',['../namespacenv__std.html',1,'']]] +]; diff --git a/docs/generated-html/search/nomatches.html b/docs/generated-html/search/nomatches.html new file mode 100644 index 0000000000..b1ded27e9a --- /dev/null +++ b/docs/generated-html/search/nomatches.html @@ -0,0 +1,12 @@ + + + + + + + +
    +
    No Matches
    +
    + + diff --git a/docs/generated-html/search/search.css b/docs/generated-html/search/search.css new file mode 100644 index 0000000000..53d15fed32 --- /dev/null +++ b/docs/generated-html/search/search.css @@ -0,0 +1,271 @@ +/*---------------- Search Box */ + +#FSearchBox { + float: left; +} + +#MSearchBox { + white-space : nowrap; + float: none; + margin-top: 8px; + right: 0px; + width: 170px; + height: 24px; + z-index: 102; +} + +#MSearchBox .left +{ + display:block; + position:absolute; + left:10px; + width:20px; + height:19px; + background:url('search_l.png') no-repeat; + background-position:right; +} + +#MSearchSelect { + display:block; + position:absolute; + width:20px; + height:19px; +} + +.left #MSearchSelect { + left:4px; +} + +.right #MSearchSelect { + right:5px; +} + +#MSearchField { + display:block; + position:absolute; + height:19px; + background:url('search_m.png') repeat-x; + border:none; + width:115px; + margin-left:20px; + padding-left:4px; + color: #909090; + outline: none; + font: 9pt Arial, Verdana, sans-serif; + -webkit-border-radius: 0px; +} + +#FSearchBox #MSearchField { + margin-left:15px; +} + +#MSearchBox .right { + display:block; + position:absolute; + right:10px; + top:8px; + width:20px; + height:19px; + background:url('search_r.png') no-repeat; + background-position:left; +} + +#MSearchClose { + display: none; + position: absolute; + top: 4px; + background : none; + border: none; + margin: 0px 4px 0px 0px; + padding: 0px 0px; + outline: none; +} + +.left #MSearchClose { + left: 6px; +} + +.right #MSearchClose { + right: 2px; +} + +.MSearchBoxActive #MSearchField { + color: #000000; +} + +/*---------------- Search filter selection */ + +#MSearchSelectWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #B7CE90; + background-color: #FBFCF9; + z-index: 10001; + padding-top: 4px; + padding-bottom: 4px; + -moz-border-radius: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +.SelectItem { + font: 8pt Arial, Verdana, sans-serif; + padding-left: 2px; + padding-right: 12px; + border: 0px; +} + +span.SelectionMark { + margin-right: 4px; + font-family: monospace; + outline-style: none; + text-decoration: none; +} + +a.SelectItem { + display: block; + outline-style: none; + color: #000000; + text-decoration: none; + padding-left: 6px; + padding-right: 12px; +} + +a.SelectItem:focus, +a.SelectItem:active { + color: #000000; + outline-style: none; + text-decoration: none; +} + +a.SelectItem:hover { + color: #FFFFFF; + background-color: #6F8C3D; + outline-style: none; + text-decoration: none; + cursor: pointer; + display: block; +} + +/*---------------- Search results window */ + +iframe#MSearchResults { + width: 60ex; + height: 15em; +} + +#MSearchResultsWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #000; + background-color: #F4F7EE; + z-index:10000; +} + +/* ----------------------------------- */ + + +#SRIndex { + clear:both; + padding-bottom: 15px; +} + +.SREntry { + font-size: 10pt; + padding-left: 1ex; +} + +.SRPage .SREntry { + font-size: 8pt; + padding: 1px 5px; +} + +body.SRPage { + margin: 5px 2px; +} + +.SRChildren { + padding-left: 3ex; padding-bottom: .5em +} + +.SRPage .SRChildren { + display: none; +} + +.SRSymbol { + font-weight: bold; + color: #789742; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRScope { + display: block; + color: #789742; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRSymbol:focus, a.SRSymbol:active, +a.SRScope:focus, a.SRScope:active { + text-decoration: underline; +} + +span.SRScope { + padding-left: 4px; +} + +.SRPage .SRStatus { + padding: 2px 5px; + font-size: 8pt; + font-style: italic; +} + +.SRResult { + display: none; +} + +DIV.searchresults { + margin-left: 10px; + margin-right: 10px; +} + +/*---------------- External search page results */ + +.searchresult { + background-color: #F5F8F0; +} + +.pages b { + color: white; + padding: 5px 5px 3px 5px; + background-image: url("../tab_a.png"); + background-repeat: repeat-x; + text-shadow: 0 1px 1px #000000; +} + +.pages { + line-height: 17px; + margin-left: 4px; + text-decoration: none; +} + +.hl { + font-weight: bold; +} + +#searchresults { + margin-bottom: 20px; +} + +.searchpages { + margin-top: 10px; +} + diff --git a/docs/generated-html/search/search.js b/docs/generated-html/search/search.js new file mode 100644 index 0000000000..a554ab9cb5 --- /dev/null +++ b/docs/generated-html/search/search.js @@ -0,0 +1,814 @@ +/* + @licstart The following is the entire license notice for the + JavaScript code in this file. + + Copyright (C) 1997-2017 by Dimitri van Heesch + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + @licend The above is the entire license notice + for the JavaScript code in this file + */ +function convertToId(search) +{ + var result = ''; + for (i=0;i do a search + { + this.Search(); + } + } + + this.OnSearchSelectKey = function(evt) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==40 && this.searchIndex0) // Up + { + this.searchIndex--; + this.OnSelectItem(this.searchIndex); + } + else if (e.keyCode==13 || e.keyCode==27) + { + this.OnSelectItem(this.searchIndex); + this.CloseSelectionWindow(); + this.DOMSearchField().focus(); + } + return false; + } + + // --------- Actions + + // Closes the results window. + this.CloseResultsWindow = function() + { + this.DOMPopupSearchResultsWindow().style.display = 'none'; + this.DOMSearchClose().style.display = 'none'; + this.Activate(false); + } + + this.CloseSelectionWindow = function() + { + this.DOMSearchSelectWindow().style.display = 'none'; + } + + // Performs a search. + this.Search = function() + { + this.keyTimeout = 0; + + // strip leading whitespace + var searchValue = this.DOMSearchField().value.replace(/^ +/, ""); + + var code = searchValue.toLowerCase().charCodeAt(0); + var idxChar = searchValue.substr(0, 1).toLowerCase(); + if ( 0xD800 <= code && code <= 0xDBFF && searchValue > 1) // surrogate pair + { + idxChar = searchValue.substr(0, 2); + } + + var resultsPage; + var resultsPageWithSearch; + var hasResultsPage; + + var idx = indexSectionsWithContent[this.searchIndex].indexOf(idxChar); + if (idx!=-1) + { + var hexCode=idx.toString(16); + resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html'; + resultsPageWithSearch = resultsPage+'?'+escape(searchValue); + hasResultsPage = true; + } + else // nothing available for this search term + { + resultsPage = this.resultsPath + '/nomatches.html'; + resultsPageWithSearch = resultsPage; + hasResultsPage = false; + } + + window.frames.MSearchResults.location = resultsPageWithSearch; + var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow(); + + if (domPopupSearchResultsWindow.style.display!='block') + { + var domSearchBox = this.DOMSearchBox(); + this.DOMSearchClose().style.display = 'inline'; + if (this.insideFrame) + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + domPopupSearchResultsWindow.style.position = 'relative'; + domPopupSearchResultsWindow.style.display = 'block'; + var width = document.body.clientWidth - 8; // the -8 is for IE :-( + domPopupSearchResultsWindow.style.width = width + 'px'; + domPopupSearchResults.style.width = width + 'px'; + } + else + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + var left = getXPos(domSearchBox) + 150; // domSearchBox.offsetWidth; + var top = getYPos(domSearchBox) + 20; // domSearchBox.offsetHeight + 1; + domPopupSearchResultsWindow.style.display = 'block'; + left -= domPopupSearchResults.offsetWidth; + domPopupSearchResultsWindow.style.top = top + 'px'; + domPopupSearchResultsWindow.style.left = left + 'px'; + } + } + + this.lastSearchValue = searchValue; + this.lastResultsPage = resultsPage; + } + + // -------- Activation Functions + + // Activates or deactivates the search panel, resetting things to + // their default values if necessary. + this.Activate = function(isActive) + { + if (isActive || // open it + this.DOMPopupSearchResultsWindow().style.display == 'block' + ) + { + this.DOMSearchBox().className = 'MSearchBoxActive'; + + var searchField = this.DOMSearchField(); + + if (searchField.value == this.searchLabel) // clear "Search" term upon entry + { + searchField.value = ''; + this.searchActive = true; + } + } + else if (!isActive) // directly remove the panel + { + this.DOMSearchBox().className = 'MSearchBoxInactive'; + this.DOMSearchField().value = this.searchLabel; + this.searchActive = false; + this.lastSearchValue = '' + this.lastResultsPage = ''; + } + } +} + +// ----------------------------------------------------------------------- + +// The class that handles everything on the search results page. +function SearchResults(name) +{ + // The number of matches from the last run of . + this.lastMatchCount = 0; + this.lastKey = 0; + this.repeatOn = false; + + // Toggles the visibility of the passed element ID. + this.FindChildElement = function(id) + { + var parentElement = document.getElementById(id); + var element = parentElement.firstChild; + + while (element && element!=parentElement) + { + if (element.nodeName == 'DIV' && element.className == 'SRChildren') + { + return element; + } + + if (element.nodeName == 'DIV' && element.hasChildNodes()) + { + element = element.firstChild; + } + else if (element.nextSibling) + { + element = element.nextSibling; + } + else + { + do + { + element = element.parentNode; + } + while (element && element!=parentElement && !element.nextSibling); + + if (element && element!=parentElement) + { + element = element.nextSibling; + } + } + } + } + + this.Toggle = function(id) + { + var element = this.FindChildElement(id); + if (element) + { + if (element.style.display == 'block') + { + element.style.display = 'none'; + } + else + { + element.style.display = 'block'; + } + } + } + + // Searches for the passed string. If there is no parameter, + // it takes it from the URL query. + // + // Always returns true, since other documents may try to call it + // and that may or may not be possible. + this.Search = function(search) + { + if (!search) // get search word from URL + { + search = window.location.search; + search = search.substring(1); // Remove the leading '?' + search = unescape(search); + } + + search = search.replace(/^ +/, ""); // strip leading spaces + search = search.replace(/ +$/, ""); // strip trailing spaces + search = search.toLowerCase(); + search = convertToId(search); + + var resultRows = document.getElementsByTagName("div"); + var matches = 0; + + var i = 0; + while (i < resultRows.length) + { + var row = resultRows.item(i); + if (row.className == "SRResult") + { + var rowMatchName = row.id.toLowerCase(); + rowMatchName = rowMatchName.replace(/^sr\d*_/, ''); // strip 'sr123_' + + if (search.length<=rowMatchName.length && + rowMatchName.substr(0, search.length)==search) + { + row.style.display = 'block'; + matches++; + } + else + { + row.style.display = 'none'; + } + } + i++; + } + document.getElementById("Searching").style.display='none'; + if (matches == 0) // no results + { + document.getElementById("NoMatches").style.display='block'; + } + else // at least one result + { + document.getElementById("NoMatches").style.display='none'; + } + this.lastMatchCount = matches; + return true; + } + + // return the first item with index index or higher that is visible + this.NavNext = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index++; + } + return focusItem; + } + + this.NavPrev = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index--; + } + return focusItem; + } + + this.ProcessKeys = function(e) + { + if (e.type == "keydown") + { + this.repeatOn = false; + this.lastKey = e.keyCode; + } + else if (e.type == "keypress") + { + if (!this.repeatOn) + { + if (this.lastKey) this.repeatOn = true; + return false; // ignore first keypress after keydown + } + } + else if (e.type == "keyup") + { + this.lastKey = 0; + this.repeatOn = false; + } + return this.lastKey!=0; + } + + this.Nav = function(evt,itemIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + var newIndex = itemIndex-1; + var focusItem = this.NavPrev(newIndex); + if (focusItem) + { + var child = this.FindChildElement(focusItem.parentNode.parentNode.id); + if (child && child.style.display == 'block') // children visible + { + var n=0; + var tmpElem; + while (1) // search for last child + { + tmpElem = document.getElementById('Item'+newIndex+'_c'+n); + if (tmpElem) + { + focusItem = tmpElem; + } + else // found it! + { + break; + } + n++; + } + } + } + if (focusItem) + { + focusItem.focus(); + } + else // return focus to search field + { + parent.document.getElementById("MSearchField").focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = itemIndex+1; + var focusItem; + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem && elem.style.display == 'block') // children visible + { + focusItem = document.getElementById('Item'+itemIndex+'_c0'); + } + if (!focusItem) focusItem = this.NavNext(newIndex); + if (focusItem) focusItem.focus(); + } + else if (this.lastKey==39) // Right + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'block'; + } + else if (this.lastKey==37) // Left + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'none'; + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } + + this.NavChild = function(evt,itemIndex,childIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + if (childIndex>0) + { + var newIndex = childIndex-1; + document.getElementById('Item'+itemIndex+'_c'+newIndex).focus(); + } + else // already at first child, jump to parent + { + document.getElementById('Item'+itemIndex).focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = childIndex+1; + var elem = document.getElementById('Item'+itemIndex+'_c'+newIndex); + if (!elem) // last child, jump to parent next parent + { + elem = this.NavNext(itemIndex+1); + } + if (elem) + { + elem.focus(); + } + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } +} + +function setKeyActions(elem,action) +{ + elem.setAttribute('onkeydown',action); + elem.setAttribute('onkeypress',action); + elem.setAttribute('onkeyup',action); +} + +function setClassAttr(elem,attr) +{ + elem.setAttribute('class',attr); + elem.setAttribute('className',attr); +} + +function createResults() +{ + var results = document.getElementById("SRResults"); + for (var e=0; ek7RCwB~R6VQOP#AvB$vH7i{6H{96zot$7cZT<7246EF5Np6N}+$IbiG6W zg#87A+NFaX+=_^xM1#gCtshC=E{%9^uQX_%?YwXvo{#q&MnpJ8uh(O?ZRc&~_1%^SsPxG@rfElJg-?U zm!Cz-IOn(qJP3kDp-^~qt+FGbl=5jNli^Wj_xIBG{Rc0en{!oFvyoNC7{V~T8}b>| z=jL2WIReZzX(YN(_9fV;BBD$VXQIxNasAL8ATvEu822WQ%mvv4FO#qs` BFGc_W literal 0 HcmV?d00001 diff --git a/docs/generated-html/search/search_r.png b/docs/generated-html/search/search_r.png new file mode 100644 index 0000000000000000000000000000000000000000..97ee8b439687084201b79c6f776a41f495c6392a GIT binary patch literal 612 zcmV-q0-ODbP)PbXFRCwB?)W514K@j&X?z2*SxFI6-@HT2E2K=9X9%Pb zEK*!TBw&g(DMC;|A)uGlRkOS9vd-?zNs%bR4d$w+ox_iFnE8fvIvv7^5<(>Te12Li z7C)9srCzmK{ZcNM{YIl9j{DePFgOWiS%xG@5CnnnJa4nvY<^glbz7^|-ZY!dUkAwd z{gaTC@_>b5h~;ug#R0wRL0>o5!hxm*s0VW?8dr}O#zXTRTnrQm_Z7z1Mrnx>&p zD4qifUjzLvbVVWi?l?rUzwt^sdb~d!f_LEhsRVIXZtQ=qSxuxqm zEX#tf>$?M_Y1-LSDT)HqG?`%-%ZpY!#{N!rcNIiL;G7F0`l?)mNGTD9;f9F5Up3Kg zw}a<-JylhG&;=!>B+fZaCX+?C+kHYrP%c?X2!Zu_olK|GcS4A70HEy;vn)I0>0kLH z`jc(WIaaHc7!HS@f*^R^Znx8W=_jIl2oWJoQ*h1^$FX!>*PqR1J8k|fw}w_y}TpE>7m8DqDO<3z`OzXt$ccSejbEZCg@0000 + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_0.js b/docs/generated-html/search/typedefs_0.js new file mode 100644 index 0000000000..fc2d59d1a1 --- /dev/null +++ b/docs/generated-html/search/typedefs_0.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['accesstype',['AccessType',['../structcutlass_1_1FragmentIterator.html#a012c5af3a8a40843c576c55ecbc663e7',1,'cutlass::FragmentIterator::AccessType()'],['../structcutlass_1_1FragmentConstIterator.html#addf5c21444f129211eefe7cdca6dfa1b',1,'cutlass::FragmentConstIterator::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html#a0b656c41b9fff6402f33e95204ce8860',1,'cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html#a7eccab04c8d3968e74486d0525a3fa02',1,'cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html#abca5165caae7304f33fcad267c16b002',1,'cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html#a87d46956aa317f06f2ba9a535fdfc5da',1,'cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType()'],['../structcutlass_1_1Load.html#ad0bf2da0c240f3a2a3f4c92162d347ae',1,'cutlass::Load::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a5d7ed0abaeea99ec3399f8eea930f761',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#a2b9faed8d92f55a46e313d79d214316d',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#a8611550c045d6def964d9dafb2be80c6',1,'cutlass::Load< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#a942970f88e13c88f496a9da67ed47a6f',1,'cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store.html#a8d2f927b2b61987dcea40e84f4575942',1,'cutlass::Store::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html#a89f329ba11f96ee3ce4428cbc792ac3d',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html#ac0af6ae18137156abe24d6479232b955',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::AccessType()'],['../structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html#ad073f5e8252ad24b086f14bd2a109cf9',1,'cutlass::Store< double, 2, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html#aeb70e4859e2795b6af63ad5e203b4da9',1,'cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::AccessType()'],['../structcutlass_1_1TileIteratorBase.html#abb3dde23971ad35a477b75ee99381b53',1,'cutlass::TileIteratorBase::AccessType()'],['../structcutlass_1_1TileLoadIterator.html#a4af8eeabe7c1ec0362782687a84466e0',1,'cutlass::TileLoadIterator::AccessType()'],['../structcutlass_1_1TileStoreIterator.html#a0e79ed59263ebc3478c43f2f9a50cb5a',1,'cutlass::TileStoreIterator::AccessType()']]], + ['accumulators',['Accumulators',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#afe6bebd94e3379c94054d04c5196edce',1,'cutlass::gemm::GemmEpilogue::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af7ff579ccb4269bfa5e9ae297260f7a2',1,'cutlass::gemm::GemmEpilogueTraits::Accumulators()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a2fadb0ad2e28109ccfa9195e817a4d54',1,'cutlass::gemm::GemmConfig::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a505306c2af2059f6e84ba32d701d1602',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a4712650b46b6183ea60d79ef18f55b86',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a760a5262f419b789540e7bbb2fda4b9d',1,'cutlass::gemm::ThreadMultiplyAdd::Accumulators()']]], + ['accumulatorsperthread',['AccumulatorsPerThread',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a98d0f84730551eaabfe7404b36478b50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a47807c9c9fb43e7f7b5f409a49986c30',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a002b1944b25cc8fe0862f40a8c8555c5',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerThread()']]], + ['accumulatorsperwarp',['AccumulatorsPerWarp',['../structcutlass_1_1gemm_1_1GemmConfig.html#a51d583dfcd645ad0ecfc23b87b3c5108',1,'cutlass::gemm::GemmConfig::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af0c856abdd9f7f26f671493cc629bf0a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a327ce1b7b6478c27c80baf5d9e26bdbc',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#aa83190df3c1639b6dd632cd4b9278d77',1,'cutlass::gemm::ThreadMultiplyAdd::AccumulatorsPerWarp()']]] +]; diff --git a/docs/generated-html/search/typedefs_1.html b/docs/generated-html/search/typedefs_1.html new file mode 100644 index 0000000000..7af807db41 --- /dev/null +++ b/docs/generated-html/search/typedefs_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_1.js b/docs/generated-html/search/typedefs_1.js new file mode 100644 index 0000000000..e1e869f3d4 --- /dev/null +++ b/docs/generated-html/search/typedefs_1.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['base',['Base',['../structcutlass_1_1gemm_1_1GlobalLoadStream.html#a507f825824e624d80a34ea9395934160',1,'cutlass::gemm::GlobalLoadStream::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a581b7cdeef3e620f246923fa07f9db5a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ae13e0d30a941e16875f196b4844b03ed',1,'cutlass::gemm::GemmGlobalIteratorAb::Base()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a8f8fbb65070589769468c6b1ac6ba7a5',1,'cutlass::gemm::GemmGlobalIteratorCd::Base()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ac0c372c24c4c5340153b11edab874741',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7ec19bf90207a7f598f2ec5166649495',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aca63ec1099444c555299dc144282dded',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a4b23ba8c14e26672a516aa43063250c2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue.html#a07f9a934f04610db41aa1aac2f4cdf04',1,'cutlass::gemm::IgemmEpilogue::Base()'],['../structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html#a98b415dbe6f7b6cb0c41a4e6b3ad5abf',1,'cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::Base()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#ab19f72d239f639f261fbb63f72f10acf',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#affd04d88a0bbef13c54f10000a5dc15d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aef7047c6a0d0c3db0bfb6bec08520aad',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#a194aa2762885c3d556a84ff410200b86',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Base()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a48a8eda430139e6a131654a54bbf0f3b',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Base()'],['../classcutlass_1_1TensorView.html#a27f09c55f879410cceb75eb25fe542d4',1,'cutlass::TensorView::Base()'],['../structcutlass_1_1TileLoadIterator.html#a1bc1bd4893c14b313ee71b71db2903f3',1,'cutlass::TileLoadIterator::Base()'],['../structcutlass_1_1TileStoreIterator.html#af4576dca736bab8ac73b308522cb4a67',1,'cutlass::TileStoreIterator::Base()']]], + ['baseparams',['BaseParams',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a09268125f1e323874f6c12b50185c517',1,'cutlass::gemm::GemmGlobalIteratorAb::BaseParams()'],['../structcutlass_1_1TileLoadIterator.html#a788bab4fa46dc26854348b751cf1cc76',1,'cutlass::TileLoadIterator::BaseParams()'],['../structcutlass_1_1TileStoreIterator.html#a5484b46ac2646edb7a185b51137f70c0',1,'cutlass::TileStoreIterator::BaseParams()']]], + ['blockswizzle',['BlockSwizzle',['../structcutlass_1_1gemm_1_1GemmTraits.html#a50672b5fa67d858aeff8f254cf28e941',1,'cutlass::gemm::GemmTraits']]] +]; diff --git a/docs/generated-html/search/typedefs_10.html b/docs/generated-html/search/typedefs_10.html new file mode 100644 index 0000000000..d8e5942ad2 --- /dev/null +++ b/docs/generated-html/search/typedefs_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_10.js b/docs/generated-html/search/typedefs_10.js new file mode 100644 index 0000000000..e488958fd2 --- /dev/null +++ b/docs/generated-html/search/typedefs_10.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['warps',['Warps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaff4a5e0f9e4256f184a22cad0ce8cf4',1,'cutlass::gemm::GemmSharedLoadTileATraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7ad7a4e33ed43926e165e66162eb620b',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af4597927405d8bb1ad2c464fad064703',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4764f70691cb3fee91ce47653363aa4f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Warps()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#abb6ba58a2f2d80db0b2c9c1d88454efd',1,'cutlass::gemm::GemmConfig::Warps()']]] +]; diff --git a/docs/generated-html/search/typedefs_11.html b/docs/generated-html/search/typedefs_11.html new file mode 100644 index 0000000000..3f37b89032 --- /dev/null +++ b/docs/generated-html/search/typedefs_11.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_11.js b/docs/generated-html/search/typedefs_11.js new file mode 100644 index 0000000000..d8526488a7 --- /dev/null +++ b/docs/generated-html/search/typedefs_11.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['yes',['yes',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac1cf3f804e7686213fd42c678cc6d669',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/typedefs_2.html b/docs/generated-html/search/typedefs_2.html new file mode 100644 index 0000000000..745d076c80 --- /dev/null +++ b/docs/generated-html/search/typedefs_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_2.js b/docs/generated-html/search/typedefs_2.js new file mode 100644 index 0000000000..d216b10908 --- /dev/null +++ b/docs/generated-html/search/typedefs_2.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['clearaccumulators',['ClearAccumulators',['../structcutlass_1_1gemm_1_1GemmTraits.html#ae1cf7988c9cff79a2c3252aaf91fc165',1,'cutlass::gemm::GemmTraits::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#aba2366bec386c74df47dfd0426b07041',1,'cutlass::gemm::HgemmTraitsHelper::ClearAccumulators()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5645e18de29a84c9a9b3f3105966f0c5',1,'cutlass::gemm::IgemmTraitsHelper::ClearAccumulators()']]], + ['consttensorref_5ft',['ConstTensorRef_t',['../classcutlass_1_1TensorView.html#a8ef76170bc5ba832dc01339133021830',1,'cutlass::TensorView']]], + ['coord_5ft',['Coord_t',['../classcutlass_1_1TensorView.html#a4037baf5069138ec3967810d2e185017',1,'cutlass::TensorView']]] +]; diff --git a/docs/generated-html/search/typedefs_3.html b/docs/generated-html/search/typedefs_3.html new file mode 100644 index 0000000000..def60a5bec --- /dev/null +++ b/docs/generated-html/search/typedefs_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_3.js b/docs/generated-html/search/typedefs_3.js new file mode 100644 index 0000000000..1c82be67fc --- /dev/null +++ b/docs/generated-html/search/typedefs_3.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['deleter_5ftype',['deleter_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a85cab9945c36dc56bd7d6adf30c0d252',1,'cutlass::platform::unique_ptr']]], + ['delta',['Delta',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#af1f105d4712f01880b0944666e2f81ae',1,'cutlass::gemm::GemmEpilogueTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aede069e51e0732a9648c437261bd4d66',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a07bb48f99000256f04f00564a4371c2f',1,'cutlass::gemm::GemmGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#aba61fb6e93a6423ab72c082c280f5db4',1,'cutlass::gemm::GemmGlobalTileCdTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a645f65f7d8f123936b286521df470224',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#afd691b764b7d105a1ed41dada6049e71',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a2ee87510d2deccf8b9633aaa4f6340ea',1,'cutlass::gemm::GemmSharedLoadTileATraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ad029d098ba13543bf99c728e6b93006d',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a5587ef22f419ab9a7c6117917cc99c57',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#ac5578da2577cddd5a38cb628f894f644',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Delta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a8f8de5a6811b77f0c721cd78a237223e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aed055504ec5f09657e059416150188a9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Delta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a08dada072eefded4c859df4e5fc25ca6',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Delta()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html#ab55665f7c2f2cb8b8b9b8ac852d48002',1,'cutlass::gemm::WmmaGemmGlobalIteratorCdTraits::Delta()'],['../structcutlass_1_1TileTraits.html#af88f5cea9f452d83004ea0fa0f9d56eb',1,'cutlass::TileTraits::Delta()'],['../structcutlass_1_1TileIteratorBase.html#a9bc6c04f4a3adeb5a29743fa43425088',1,'cutlass::TileIteratorBase::Delta()'],['../structcutlass_1_1TileLoadIterator.html#ac2a7f94723259f0d3c7b8a6d5b8778bf',1,'cutlass::TileLoadIterator::Delta()'],['../structcutlass_1_1TileStoreIterator.html#a1c433ba0eea5e6a46f36101d8de98ed0',1,'cutlass::TileStoreIterator::Delta()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a47404b4527b101e286347714aea687d5',1,'cutlass::TileTraitsStrideMajor::Delta()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#ab1a4945bf562debeee1af813288e5896',1,'cutlass::TileTraitsContiguousMajor::Delta()'],['../structcutlass_1_1TileTraitsWarpRake.html#a3ce218b223c5716af40c316899324bbe',1,'cutlass::TileTraitsWarpRake::Delta()']]] +]; diff --git a/docs/generated-html/search/typedefs_4.html b/docs/generated-html/search/typedefs_4.html new file mode 100644 index 0000000000..ef733ad276 --- /dev/null +++ b/docs/generated-html/search/typedefs_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_4.js b/docs/generated-html/search/typedefs_4.js new file mode 100644 index 0000000000..6415af3376 --- /dev/null +++ b/docs/generated-html/search/typedefs_4.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['element',['Element',['../structcutlass_1_1Fragment.html#a9c67fa5bbd0b8b49bd6ec002dee3cbab',1,'cutlass::Fragment::Element()'],['../structcutlass_1_1FragmentIterator.html#ab4ef3c5a6b5e13224e45bbbcb9f1bc5d',1,'cutlass::FragmentIterator::Element()'],['../structcutlass_1_1FragmentConstIterator.html#ae98ab2a88342e7dbf9631cfb5cf5e706',1,'cutlass::FragmentConstIterator::Element()']]], + ['element_5ftype',['element_type',['../classcutlass_1_1platform_1_1unique__ptr.html#a94cea0ebf2ac4bec69dfa1f80ea07d50',1,'cutlass::platform::unique_ptr']]], + ['epilogue',['Epilogue',['../structcutlass_1_1gemm_1_1GemmTraits.html#a424f1ac14e1e7ad37428edd0cf13e7fe',1,'cutlass::gemm::GemmTraits::Epilogue()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a234ae6065d5ab56135e10119d3ad2d98',1,'cutlass::gemm::HgemmTraitsHelper::Epilogue()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a5e2ed697a9091a1ca8b19855b5a2c651',1,'cutlass::gemm::IgemmTraitsHelper::Epilogue()']]] +]; diff --git a/docs/generated-html/search/typedefs_5.html b/docs/generated-html/search/typedefs_5.html new file mode 100644 index 0000000000..94db6d21e5 --- /dev/null +++ b/docs/generated-html/search/typedefs_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_5.js b/docs/generated-html/search/typedefs_5.js new file mode 100644 index 0000000000..84d69abc40 --- /dev/null +++ b/docs/generated-html/search/typedefs_5.js @@ -0,0 +1,14 @@ +var searchData= +[ + ['false_5ftype',['false_type',['../namespacecutlass_1_1platform.html#ad8c95b2109070847b13d355120344380',1,'cutlass::platform']]], + ['fetchedfragment',['FetchedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0a7f6ae85cfb162b1facf24dff8bab36',1,'cutlass::gemm::GlobalLoadStreamBase::FetchedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a41b45085f17532a6394de3f5ccf201e7',1,'cutlass::gemm::SharedLoadStream::FetchedFragment()']]], + ['fragment',['Fragment',['../structcutlass_1_1FragmentIterator.html#afd15cbe1c9a0fd7871b12f3f3042c808',1,'cutlass::FragmentIterator::Fragment()'],['../structcutlass_1_1FragmentConstIterator.html#acac5b62b365f36f370adb0fee11cea05',1,'cutlass::FragmentConstIterator::Fragment()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a32687e2aa49dfa251eab14d5cd2036be',1,'cutlass::gemm::GlobalLoadStreamBase::Fragment()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2180cfbb482d300472ad2993e4b555d4',1,'cutlass::gemm::GemmGlobalIteratorAb::Fragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a9f025ed2609bf33230f6a390c22b11b7',1,'cutlass::gemm::SharedLoadStream::Fragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a82dc6d9a10de7aba9a69e6025b2cc2b7',1,'cutlass::gemm::HgemmSwizzle::Fragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a67693ee79f93cb61fc37f2e632eaea8d',1,'cutlass::gemm::IgemmSwizzle::Fragment()'],['../structcutlass_1_1TileIteratorBase.html#a0d7b595d7959cc1680fc07c2e02e1c8e',1,'cutlass::TileIteratorBase::Fragment()'],['../structcutlass_1_1TileLoadIterator.html#aaf72c4897641080b1d84c0bbd8d813cc',1,'cutlass::TileLoadIterator::Fragment()'],['../structcutlass_1_1TileStoreIterator.html#a95da23108b74ad085024ab45e84083e1',1,'cutlass::TileStoreIterator::Fragment()']]], + ['fragmenta',['FragmentA',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a1daf96b6d152c5cf32f248bbfd605b74',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a71aadbb130d4b1a6532c45282b37354f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a69d387d932b628dc51c18fcc178c4914',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentA()']]], + ['fragmentb',['FragmentB',['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ae79e7fc5be2f4c8d30ca83edc151f63a',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a43e278686b493d0aef943f32a9f47b9e',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a5429a730a1dea00dc4aecbe8e3ef1620',1,'cutlass::gemm::ThreadMultiplyAdd::FragmentB()']]], + ['fragmentconstiterator',['FragmentConstIterator',['../structcutlass_1_1TileIteratorBase.html#a25a241bbdc0b0121992019a16f1a6d60',1,'cutlass::TileIteratorBase::FragmentConstIterator()'],['../structcutlass_1_1TileLoadIterator.html#a4c7a3a4917245de8269b74bdabe16b76',1,'cutlass::TileLoadIterator::FragmentConstIterator()'],['../structcutlass_1_1TileStoreIterator.html#a48de0db7ee2ee9699b946a9d5a0364c7',1,'cutlass::TileStoreIterator::FragmentConstIterator()']]], + ['fragmentelement',['FragmentElement',['../structcutlass_1_1TileIteratorBase.html#ac7cca14d54bf3f0749db1ffaea7c9ae7',1,'cutlass::TileIteratorBase::FragmentElement()'],['../structcutlass_1_1TileLoadIterator.html#a2edd89863b8035137ccd8dd3ad7be464',1,'cutlass::TileLoadIterator::FragmentElement()'],['../structcutlass_1_1TileStoreIterator.html#a2b13136a970fae187fcb377c9be28fac',1,'cutlass::TileStoreIterator::FragmentElement()']]], + ['fragmentiterator',['FragmentIterator',['../structcutlass_1_1TileIteratorBase.html#a379a52ed1128fc9f93cad35d3e3233e5',1,'cutlass::TileIteratorBase::FragmentIterator()'],['../structcutlass_1_1TileLoadIterator.html#aebbe5a0996dcd362caad618e78dc2591',1,'cutlass::TileLoadIterator::FragmentIterator()'],['../structcutlass_1_1TileStoreIterator.html#a0843b2d82422e7178f324a8d3be9d705',1,'cutlass::TileStoreIterator::FragmentIterator()']]], + ['fragmentmultiplyadd',['FragmentMultiplyAdd',['../structcutlass_1_1gemm_1_1LinearScaling.html#aa697d4eaced1ef08247aeb1fcc0f0ea8',1,'cutlass::gemm::LinearScaling']]], + ['fragmentshape',['FragmentShape',['../structcutlass_1_1FragmentIterator.html#a63ff1767c4923b0a2b6b64487306ed76',1,'cutlass::FragmentIterator::FragmentShape()'],['../structcutlass_1_1FragmentConstIterator.html#a880f12d0cd42cdae7ce6009d2233f577',1,'cutlass::FragmentConstIterator::FragmentShape()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#afe44fedcf24b90c0cf6ac7d1495b89e4',1,'cutlass::gemm::HgemmSwizzle::FragmentShape()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a13a3b052cd8b714471489a9cc4dc7004',1,'cutlass::gemm::IgemmSwizzle::FragmentShape()'],['../structcutlass_1_1TileIteratorBase.html#a14f4b356c9cd320e6e7b451edbf58c24',1,'cutlass::TileIteratorBase::FragmentShape()'],['../structcutlass_1_1TileLoadIterator.html#a7c27a7b0d8593b002eca186c15fdc869',1,'cutlass::TileLoadIterator::FragmentShape()'],['../structcutlass_1_1TileStoreIterator.html#a3b872e85844c9e009fa480a71a829136',1,'cutlass::TileStoreIterator::FragmentShape()']]], + ['functor',['Functor',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a6c30bea1b2a1bd2e981025851d5b12d1',1,'cutlass::gemm::GemmEpilogue::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a7cdb30f17692e8fdb3dd4cf4c0b8e9ee',1,'cutlass::gemm::GemmEpilogueTraits::Functor()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a981134cf87d85aa28570a62d9e878b10',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Functor()']]] +]; diff --git a/docs/generated-html/search/typedefs_6.html b/docs/generated-html/search/typedefs_6.html new file mode 100644 index 0000000000..bda8ea1c5c --- /dev/null +++ b/docs/generated-html/search/typedefs_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_6.js b/docs/generated-html/search/typedefs_6.js new file mode 100644 index 0000000000..84c6585cf2 --- /dev/null +++ b/docs/generated-html/search/typedefs_6.js @@ -0,0 +1,23 @@ +var searchData= +[ + ['gemmconfig',['GemmConfig',['../structcutlass_1_1gemm_1_1GemmTraits.html#a4efe5d156abca056ef8b5334fb574dd5',1,'cutlass::gemm::GemmTraits::GemmConfig()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1597c776238f35bcb1acc0a8f8f9c118',1,'cutlass::gemm::HgemmTraitsHelper::GemmConfig()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#af10aebe7ca4e24cce435ac4cd60e7bac',1,'cutlass::gemm::IgemmTraitsHelper::GemmConfig()']]], + ['gemmepiloguetraits',['GemmEpilogueTraits',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a4a0f361b5c47d0ab5f3308cd3b3b6ef6',1,'cutlass::gemm::HgemmTraitsHelper']]], + ['gemmtiletraitshelpera',['GemmTileTraitsHelperA',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a5557c86a530f5d20a35d3fa620adf417',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ab9e10d54c81a359db0eba58a11b9a0cf',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperA()']]], + ['gemmtiletraitshelperb',['GemmTileTraitsHelperB',['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8768c2b03bea0c3601c47dde2bc7ca89',1,'cutlass::gemm::HgemmTraitsHelper::GemmTileTraitsHelperB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a095505bfcea6791accd06bf4d37b9df8',1,'cutlass::gemm::IgemmTraitsHelper::GemmTileTraitsHelperB()']]], + ['globalfragmentc',['GlobalFragmentC',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad8e5337f3d19437e9c4cafcfcc3e3d3e',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globalfragmentd',['GlobalFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a723cd69ee4d5c26579b36e02c531ea88',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['globaliterator',['GlobalIterator',['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a56d3f2606f9464ec57aa61aae378c642',1,'cutlass::gemm::HgemmSwizzle::GlobalIterator()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a880878914c25db44a1781725c24af514',1,'cutlass::gemm::IgemmSwizzle::GlobalIterator()']]], + ['globalloaditeratora',['GlobalLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a5687850f235d644a4820851880740d27',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ab8a3def34300afb5745453d0b33204aa',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ac7ee33e683e48511a1a220df6c9d4758',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorA()']]], + ['globalloaditeratorb',['GlobalLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a362794738bc14b283a91558bcadbbfd5',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a95559f28cab076da723e4cb24351116e',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a3a6d816852cca926afa08103f754477b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadIteratorB()']]], + ['globalloaditeratorc',['GlobalLoadIteratorC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aecb5429363c7156ee3ad596fe250120a',1,'cutlass::gemm::GemmEpilogue::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a8409d84ee282a4d6953bd41149d8b9c2',1,'cutlass::gemm::GemmEpilogueTraits::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aeea13630bb281834b717f8d9d13a9319',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadIteratorC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a24826f99d097eea0298e6be12a6327b9',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadIteratorC()']]], + ['globalloadstreama',['GlobalLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a9cd6c3fddfb4315eb52b672900462c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a448c242880183e006b70d839d210a2ec',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a2aaece6093100c71c4d587994200e3bb',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a7fb1354154f303642da72e6fd157d846',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamA()']]], + ['globalloadstreamb',['GlobalLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac393b07e780629fc8254fc22cc6f815b',1,'cutlass::gemm::GemmTraits::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aad467ed9a680b4d77acecb096799cd89',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abaf5f16ab0b215b406766ecadab29394',1,'cutlass::gemm::HgemmTraitsHelper::GlobalLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a88e66ee760aea03687e7b3ccc6ea535b',1,'cutlass::gemm::IgemmTraitsHelper::GlobalLoadStreamB()']]], + ['globalloadtiletraits',['GlobalLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a94f00f94a88588522ca3f9f0197a5a9b',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#aaa009025dcd6360ead1dc18005688821',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalLoadTileTraits()']]], + ['globalstoreiteratord',['GlobalStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a1c766374d900535c944cf2a2de6925f4',1,'cutlass::gemm::GemmEpilogue::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aeef5745d149770c9f79e12f6d97ffce1',1,'cutlass::gemm::GemmEpilogueTraits::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a23be7b4b498c17f9235a2b4896f1bffb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad3e937c15bfac443b0e3b94d702f46b2',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreIteratorD()']]], + ['globalstoretiletraits',['GlobalStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a16d7df2934c3c59d9b8f36f7a2137aee',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a16b06a1611dbd22adaa0c9ee5e1b15bd',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalStoreTileTraits()']]], + ['globaltiletraits',['GlobalTileTraits',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#adc95f4a8617cdf28e5b5d7d2d1aefec2',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a8160a260acce2362e90d43bce733c69d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a5fee0ed52326c0685e8d8295e40ce064',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afbc41e7b98097b153fd27a48f073a877',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a36e082b2da22d17eeb73af6bd0632314',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1e6356bf5c87271ab9794fcc79edc145',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a738774d1eb79de7e29c372ddfd48258d',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a24f38105e3c331c733cb672c3a9be588',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits()']]], + ['globaltransformera',['GlobalTransformerA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#af9a98d39d6959a9641f7c3c90df2f98e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a3fb86b6d3e353df6b752510d64c5e647',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a23bb732b7237bcabe3667408f288844d',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerA()']]], + ['globaltransformerb',['GlobalTransformerB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a437070ba4a214aee363315d6019e450c',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7b4de712868095200a338802c1fbb3de',1,'cutlass::gemm::HgemmTraitsHelper::GlobalTransformerB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a600bcc571ea5e04a98663c134d4664b9',1,'cutlass::gemm::IgemmTraitsHelper::GlobalTransformerB()']]], + ['globaltransformerc',['GlobalTransformerC',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a41edfd24b7dd2759f8b72ae8534182a9',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a051f25a4aa3ea71ff400582228adbdaa',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a0682b61d1a1a951026ff026bff9361bb',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerC()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad0116b2e7b2ca1526246e2ff7e73fd2f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerC()']]], + ['globaltransformerd',['GlobalTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a32f618ff19d984447fba7355d46a69a7',1,'cutlass::gemm::GemmEpilogue::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a261e526c6a8e832bc483bf4e486cc9d7',1,'cutlass::gemm::GemmEpilogueTraits::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae96c5a3d58dc7a95543f8749f762ca43',1,'cutlass::gemm::GemmEpilogueTraitsHelper::GlobalTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a880293ef6a48a0f4941c8f984c36f591',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::GlobalTransformerD()']]] +]; diff --git a/docs/generated-html/search/typedefs_7.html b/docs/generated-html/search/typedefs_7.html new file mode 100644 index 0000000000..565b233f1d --- /dev/null +++ b/docs/generated-html/search/typedefs_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_7.js b/docs/generated-html/search/typedefs_7.js new file mode 100644 index 0000000000..ec92354833 --- /dev/null +++ b/docs/generated-html/search/typedefs_7.js @@ -0,0 +1,11 @@ +var searchData= +[ + ['igemmconfig',['IgemmConfig',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5a52727bb9b5d5f8afa7d0384f564036',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['immediateoffsetstrides',['ImmediateOffsetStrides',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#abc47717230ddde3edc88d2770f6841bf',1,'cutlass::gemm::GemmGlobalTileTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a14e9713b0cd34af433c3cae9b283b54c',1,'cutlass::gemm::GemmGlobalTileCdTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a027bebceeda2287b40915ffd95d494a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a39414f484da7f993bc96d61c97273614',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a8e767b5e2fb95b0b02a0ea3e8ea58368',1,'cutlass::gemm::GemmSharedLoadTileATraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a5e4204b52ee081a37e824ca71c291c03',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ac585815d08290d9a5a9cdbd611ffdac4',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9cfb32f902593e7dc018ee802c3520b8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ImmediateOffsetStrides()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af53d49bad7060b87a2761fe8a82a7ddd',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ImmediateOffsetStrides()'],['../structcutlass_1_1TileIteratorBase.html#a561ceb1093b28b8dce67df0129b7b8b8',1,'cutlass::TileIteratorBase::ImmediateOffsetStrides()']]], + ['index',['Index',['../structcutlass_1_1gemm_1_1Gemm.html#a0aca711d07245f3071adeb1111fedd34',1,'cutlass::gemm::Gemm::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a07c93d583bfddd8f916fba6ef809832e',1,'cutlass::gemm::GemmEpilogue::Index()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab430d05bd17efd60c28077c87b5ca331',1,'cutlass::gemm::GemmEpilogueTraits::Index()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a6a6e38022606dd8d41cf7264fb059cc2',1,'cutlass::gemm::GlobalLoadStreamBase::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a7ff9cae930c8a6bb9c8ee6d81cb1953f',1,'cutlass::gemm::GemmGlobalIteratorAb::Index()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56847e834b31b88544093c3df54d299f',1,'cutlass::gemm::GemmGlobalIteratorCd::Index()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#ae67227cecbe84f5c8497d9a7ff82b367',1,'cutlass::gemm::GemmTraits::Index()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a3f45216454a550a116935aede0bda3de',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Index()'],['../structcutlass_1_1TileIteratorBase.html#a44665808adfd69df0d26cec4b1840cc3',1,'cutlass::TileIteratorBase::Index()'],['../structcutlass_1_1TileLoadIterator.html#aaa83f05e0cb3204053c3ee1da036cd36',1,'cutlass::TileLoadIterator::Index()'],['../structcutlass_1_1TileStoreIterator.html#a5ac2280dfcac08cec17b8c0db1c4593e',1,'cutlass::TileStoreIterator::Index()']]], + ['inputfragment',['InputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#ac7906301019c3e6d60985c3851f1e95e',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::InputFragment()'],['../structcutlass_1_1Copy.html#aed254bbc1ad94ed9d335ab02f199ceb1',1,'cutlass::Copy::InputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#ab5fab63d83eb0444c08bda16491d2627',1,'cutlass::gemm::HgemmSwizzle::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#aa9a4b05f9fc28b80a4ae4aabb2ce1e8c',1,'cutlass::gemm::IgemmFloatToInt8Converter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a702ca51abc077355a2d7343976a0cfdb',1,'cutlass::gemm::IgemmInt8ToFloatConverter::InputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#a24a0bd5a9251ba5204b35eb4c4ac7727',1,'cutlass::gemm::IgemmSwizzle::InputFragment()']]], + ['instructionshape',['InstructionShape',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#ac93ba536992debeae86087e638167a13',1,'cutlass::gemm::FragmentMultiplyAdd::InstructionShape()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#ab16a3d8adda89cc4f9765116ea75a4b7',1,'cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a3a57d05f50932d718538f0d1ededa95b',1,'cutlass::gemm::GemmConfig::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa56cdefa659af5ce4efd493b94bafdfd',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#ad73372a37315b0c17a8db21e40a78574',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ac6381210d447fda9b0e9a028d167f22b',1,'cutlass::gemm::ThreadMultiplyAdd::InstructionShape()']]], + ['iterations',['Iterations',['../structcutlass_1_1FragmentIterator.html#a4324ae522c6463e66a64f05d2e58b5f0',1,'cutlass::FragmentIterator::Iterations()'],['../structcutlass_1_1FragmentConstIterator.html#a527100e34ed700787b1419157710dbb2',1,'cutlass::FragmentConstIterator::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a8e3c978da6ed56239783bf4db0a936ae',1,'cutlass::gemm::GemmEpilogue::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#ab00969bdda930eeb7b82985c476adf7d',1,'cutlass::gemm::GemmEpilogueTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ad7b23352072b1509d3383ee775756d2a',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aaf6410f99d7f995792d0ac34efd3a82f',1,'cutlass::gemm::GemmGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a72eebc18d31900db57fa77508016f64a',1,'cutlass::gemm::GemmGlobalTileCdTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a6125e052e47296c3ef53c8a149ffd31b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a025445699c5c86237d8c3e48f01081ea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#ae96e490d38ade6db4d853fb6c8f3378b',1,'cutlass::gemm::GemmSharedLoadTileATraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a27bc06b72a94e34d5da6fbfb950459b5',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a6bacc866485330f80596f634e6d14336',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a81ca35e0c5d9553d1dccc981cbd89d47',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Iterations()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#aa9b46937bea47d071d277aa212dd610b',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a0b9b2b7838cb13a61a16501a2662fa51',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Iterations()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a9fb4b56091d4458ebd82130bc3951e5b',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Iterations()'],['../structcutlass_1_1PredicateTileAdapter.html#a1f2d52eec9f488c2a53c4d62af824450',1,'cutlass::PredicateTileAdapter::Iterations()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#a5e461e0eb376de60605a6ab5fdc38058',1,'cutlass::ConstPredicateTileAdapter::Iterations()'],['../structcutlass_1_1TileTraits.html#af7ae2fdb4c8f1702169cc7d437d2b469',1,'cutlass::TileTraits::Iterations()'],['../structcutlass_1_1TileIteratorBase.html#a352ed0773b37f03bf68e4b6cf9899474',1,'cutlass::TileIteratorBase::Iterations()'],['../structcutlass_1_1TileLoadIterator.html#a9720b1e4a10c2d5aa85f9a9c66a31bbf',1,'cutlass::TileLoadIterator::Iterations()'],['../structcutlass_1_1TileStoreIterator.html#a552a67fb03c28e985d143f6193f88308',1,'cutlass::TileStoreIterator::Iterations()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a03a32694da75bb95422c6b550e3324e2',1,'cutlass::TileTraitsStrideMajor::Iterations()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a425a20b642ae8736c12626b2de9b8b82',1,'cutlass::TileTraitsContiguousMajor::Iterations()'],['../structcutlass_1_1TileTraitsWarpRake.html#a410e44aa83f2179152a48f7aceb05323',1,'cutlass::TileTraitsWarpRake::Iterations()']]], + ['iterationsstrides',['IterationsStrides',['../structcutlass_1_1FragmentConstIterator.html#ab683796885f3bae3765efd96883f311b',1,'cutlass::FragmentConstIterator']]], + ['iterator',['Iterator',['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a6925270c4ad157554ab155cddc7b46e6',1,'cutlass::gemm::SharedLoadStream']]] +]; diff --git a/docs/generated-html/search/typedefs_8.html b/docs/generated-html/search/typedefs_8.html new file mode 100644 index 0000000000..3063e0327b --- /dev/null +++ b/docs/generated-html/search/typedefs_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_8.js b/docs/generated-html/search/typedefs_8.js new file mode 100644 index 0000000000..e54f847b9c --- /dev/null +++ b/docs/generated-html/search/typedefs_8.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['loaditerator',['LoadIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#acff2a1ab180eec672714cd587a28f9fe',1,'cutlass::gemm::GlobalLoadStreamBase']]] +]; diff --git a/docs/generated-html/search/typedefs_9.html b/docs/generated-html/search/typedefs_9.html new file mode 100644 index 0000000000..9c978f7ad0 --- /dev/null +++ b/docs/generated-html/search/typedefs_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_9.js b/docs/generated-html/search/typedefs_9.js new file mode 100644 index 0000000000..76a1247d22 --- /dev/null +++ b/docs/generated-html/search/typedefs_9.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['multiplicandtraits',['MultiplicandTraits',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a21a3524edaf002b5e5878df3c7eae7e7',1,'cutlass::gemm::GemmGlobalTileTraits']]], + ['multiplyadd',['MultiplyAdd',['../structcutlass_1_1gemm_1_1GemmConfig.html#a8669096ddbb8c810fb8d2313d62e6ee7',1,'cutlass::gemm::GemmConfig::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#af810544e956b04830c5be7ce41d3b45c',1,'cutlass::gemm::GemmTraits::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ae9facf63912d98e597883bf7efb56cc8',1,'cutlass::gemm::HgemmTraitsHelper::MultiplyAdd()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a87e34d56fa955670331749724bee9fd8',1,'cutlass::gemm::IgemmTraitsHelper::MultiplyAdd()']]], + ['multiplyaddscalar',['MultiplyAddScalar',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a19fb8c9b9a77aebec507635de7da6f21',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#afac6f7a62b24396ea6861e6fd10779cc',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a42dd312d4cf5bb53b472389897f9deeb',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aad14588b1515e37ede24915f589d32ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar()']]] +]; diff --git a/docs/generated-html/search/typedefs_a.html b/docs/generated-html/search/typedefs_a.html new file mode 100644 index 0000000000..426df90548 --- /dev/null +++ b/docs/generated-html/search/typedefs_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_a.js b/docs/generated-html/search/typedefs_a.js new file mode 100644 index 0000000000..be59a69a5c --- /dev/null +++ b/docs/generated-html/search/typedefs_a.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['no',['no',['../structcutlass_1_1platform_1_1is__base__of__helper.html#ae096aa6c67f60d8d9c5a4b084118a8af',1,'cutlass::platform::is_base_of_helper']]] +]; diff --git a/docs/generated-html/search/typedefs_b.html b/docs/generated-html/search/typedefs_b.html new file mode 100644 index 0000000000..fe314a1313 --- /dev/null +++ b/docs/generated-html/search/typedefs_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_b.js b/docs/generated-html/search/typedefs_b.js new file mode 100644 index 0000000000..90d2ef437b --- /dev/null +++ b/docs/generated-html/search/typedefs_b.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['offset_5ft',['Offset_t',['../classcutlass_1_1TensorView.html#a215946fb080a5253815feb1f639c8f6f',1,'cutlass::TensorView']]], + ['outputfragment',['OutputFragment',['../structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html#a8ef69ab595489e142911e8e240fb405a',1,'cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::OutputFragment()'],['../structcutlass_1_1Copy.html#a545be6c284d625b0841a10cc9126e14a',1,'cutlass::Copy::OutputFragment()'],['../structcutlass_1_1gemm_1_1HgemmSwizzle.html#a9c04f0b0eb0293325f661b72168d4fa8',1,'cutlass::gemm::HgemmSwizzle::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html#a3d89bfc0d94cd695cbe4a61859e5e553',1,'cutlass::gemm::IgemmFloatToInt8Converter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html#a66ac385a1cd771b95f70ee36cd74e8f7',1,'cutlass::gemm::IgemmInt8ToFloatConverter::OutputFragment()'],['../structcutlass_1_1gemm_1_1IgemmSwizzle.html#ac0a4e31e95f8e0c77ae087284bb02ff8',1,'cutlass::gemm::IgemmSwizzle::OutputFragment()']]], + ['outputtile',['OutputTile',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a92a135fac401d43a8d2f14982d90274b',1,'cutlass::gemm::GemmEpilogue::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#aed1bd9df5ff579ba3e36ae5ba781c075',1,'cutlass::gemm::GemmEpilogueTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ac30a062bed1a65e45961c4f301b69101',1,'cutlass::gemm::GemmEpilogueTraitsHelper::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ad52b81080731ee1f0d3c2c7eaba6f60d',1,'cutlass::gemm::GemmSharedStoreTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#acb16feebdcad5bbebe9d4d3383c37899',1,'cutlass::gemm::GemmSharedLoadTileDTraits::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a53450f4d7444d6a4c0d2353496c0a4fd',1,'cutlass::gemm::GemmConfig::OutputTile()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a97d7ee63e5d180410b370f095648f367',1,'cutlass::gemm::GemmTraits::OutputTile()']]] +]; diff --git a/docs/generated-html/search/typedefs_c.html b/docs/generated-html/search/typedefs_c.html new file mode 100644 index 0000000000..3a6a4a76c0 --- /dev/null +++ b/docs/generated-html/search/typedefs_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_c.js b/docs/generated-html/search/typedefs_c.js new file mode 100644 index 0000000000..7807c3a1f6 --- /dev/null +++ b/docs/generated-html/search/typedefs_c.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['params',['Params',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ae5209fa80705442693833c63d535161e',1,'cutlass::gemm::GemmEpilogue']]], + ['pointer',['pointer',['../classcutlass_1_1platform_1_1unique__ptr.html#ab6ce60d03d11b269c1e151dfa7c696f9',1,'cutlass::platform::unique_ptr::pointer()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#adcbf24c1b7f45ab5fe8f3ad94154b4d1',1,'cutlass::gemm::GlobalLoadStreamBase::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a3ff6f630b6b317ace1cf6e13fdf3a0cd',1,'cutlass::gemm::GemmGlobalTileTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a3abcfa68ae9904a13195d32d6e6c4bc6',1,'cutlass::gemm::GemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a5be0c995c57faafaad7ae55ae015fc00',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ab883c2a8b90262152faca9cabe515dc4',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#adc4946dfbe914140c6852d0c05b30864',1,'cutlass::gemm::GemmSharedLoadTileATraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afafb3d9ae470c8ef56ec4ca5e66e2182',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a20471c2f569c28538dad8a220ab25624',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1e72b69cf2147e4d194893a64417b920',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a84a73da2a07210fcfad10853b941c85e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Pointer()'],['../structcutlass_1_1TileLoadIterator.html#a5a179e148ccd770e1703f288624fa9b8',1,'cutlass::TileLoadIterator::Pointer()']]], + ['predicatevector',['PredicateVector',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a3dd74f6e12339a87c0eb8f75fbdc7b9c',1,'cutlass::gemm::GemmGlobalIteratorAb::PredicateVector()'],['../structcutlass_1_1PredicateTileAdapter.html#a72669300eb0bd18ea8124f780862a0e4',1,'cutlass::PredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1ConstPredicateTileAdapter.html#ab9143288811a1262f7007f1b76b32e8f',1,'cutlass::ConstPredicateTileAdapter::PredicateVector()'],['../structcutlass_1_1TileIteratorBase.html#a7ab46a9210b421d32af4d1394892cfd5',1,'cutlass::TileIteratorBase::PredicateVector()'],['../structcutlass_1_1TileLoadIterator.html#a64ae02b44f275ef2f016949aec769328',1,'cutlass::TileLoadIterator::PredicateVector()'],['../structcutlass_1_1TileStoreIterator.html#a5aa507eaeb63951f8e69fb223ec41809',1,'cutlass::TileStoreIterator::PredicateVector()']]] +]; diff --git a/docs/generated-html/search/typedefs_d.html b/docs/generated-html/search/typedefs_d.html new file mode 100644 index 0000000000..8c3b81fde1 --- /dev/null +++ b/docs/generated-html/search/typedefs_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_d.js b/docs/generated-html/search/typedefs_d.js new file mode 100644 index 0000000000..b573365691 --- /dev/null +++ b/docs/generated-html/search/typedefs_d.js @@ -0,0 +1,31 @@ +var searchData= +[ + ['scalar',['Scalar',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a0d38914bf97084e04102e7897aee4295',1,'cutlass::gemm::GemmEpilogue::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a006e50cf5fb67407d41c60d6d08b8b66',1,'cutlass::gemm::GemmEpilogueTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ae2b82b9b62aefa15005091bb84ac20e8',1,'cutlass::gemm::GemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afbbf15a7b5e4c38e59bf1debf67f04d6',1,'cutlass::gemm::GlobalLoadStreamBase::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a6894b653fffa59bcb847bc3295643d6b',1,'cutlass::gemm::GemmGlobalTileTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a5817b81c7013db9a3f7394ad4b1db79a',1,'cutlass::gemm::GemmGlobalIteratorAb::Scalar()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6b5b207eb1147e9669215e192901df9e',1,'cutlass::gemm::GemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a8b04fd003fc2db46d749360e8838438b',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aaa439a0bb6b9de5e2722ea7b011effea',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a1b6956adc65254202864520b668edd14',1,'cutlass::gemm::GemmSharedLoadTileATraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a2a6065e583155b3e389253d3bfb64d73',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9a2218b570dada2f1e3ccd8004c47856',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a1b025cb056729706f36469e74a9799dc',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af511f0ff83166b2a77d4cad4150c8e8f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ac618881d66790e4c280dc5692e5ddf95',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a8ae7db3f2f0c57779729d500386c004c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a7639ccd7f6419a9f232db173a228e756',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ae4128bba3f1df6ef7824e2db79745b00',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html#ab1068ba72468f9ede1d05ba41ea31317',1,'cutlass::gemm::IgemmEpilogueScalar::Scalar()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html#a0983fd25494f6a7ed5af37a02e99f650',1,'cutlass::gemm::IgemmEpilogueScalar< int >::Scalar()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ae6b053ca059932f7c0d3c99243854183',1,'cutlass::gemm::LinearScaling::Scalar()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab9979f3f1f6d31e1466780c5777de25e',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Scalar()'],['../structcutlass_1_1TileIteratorBase.html#a17163e93d7d3616b4950925f72bb4c16',1,'cutlass::TileIteratorBase::Scalar()'],['../structcutlass_1_1TileLoadIterator.html#ae8dff52e619f06fbdbca8cb847c79895',1,'cutlass::TileLoadIterator::Scalar()'],['../structcutlass_1_1TileStoreIterator.html#ad52318b430437575b55099ca992ca3a7',1,'cutlass::TileStoreIterator::Scalar()'],['../unioncutlass_1_1Vector.html#a56875d7cbf921261e68e1f63212db5bd',1,'cutlass::Vector::Scalar()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a03199df1287d263f7267239c014f1d9b',1,'cutlass::Vector< half, kLanes_ >::Scalar()'],['../structcutlass_1_1VectorTraits.html#ab3b49d7fb52050c13e50e3c75bf72599',1,'cutlass::VectorTraits::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aaf35570b10829356762dcec925a5b4bc',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Scalar()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a6e99dde8432b13472971dc41573a574e',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar()']]], + ['scalara',['ScalarA',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a6fa76b3e7ac721d47df47eba4e9ef222',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarA()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#a366083b229b28e7f44da38273b2ab263',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA()'],['../structcutlass_1_1gemm_1_1Gemm.html#a6fcf9daef57558e1bb932c6eba99721b',1,'cutlass::gemm::Gemm::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a9d1e4e364be8fd9de5e1199d93ad76aa',1,'cutlass::gemm::GemmConfig::ScalarA()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a96d64bdc48db4971798b620d6b49b3f6',1,'cutlass::gemm::GemmTraits::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#a236a408791a38358cbadf19dd0e8ed9f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aeef5fa0437b4ce1c2e8ac4bc7e062b65',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a382242001b4c8e18ea5f2de724902217',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarA()']]], + ['scalarb',['ScalarB',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#af4f5c4a79c447e5aaf313878eca022cb',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarB()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af52ec4b92a3e788169764014aebb85a1',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB()'],['../structcutlass_1_1gemm_1_1Gemm.html#ae6f11bb666c2c8510e99200a2c0fc2f4',1,'cutlass::gemm::Gemm::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#aa13d6f5e5ad907ef09c88ae49e6e8e9b',1,'cutlass::gemm::GemmConfig::ScalarB()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#aa0e8fd28f5247764dfb7843f7670c698',1,'cutlass::gemm::GemmTraits::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#ac7557562de1108bf1abc10829c83e88f',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#aaf9e4b8b16150a6ad826c228af2bf103',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a42d181e7f4d0d0a15e1c911d3498b767',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarB()']]], + ['scalarc',['ScalarC',['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html#a92c1ffbfb479cd9fa2c2632ef8e347d3',1,'cutlass::gemm::FragmentMultiplyAdd::ScalarC()'],['../structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html#af553be8ef0b4dc9bb593d98dfce8628d',1,'cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC()'],['../structcutlass_1_1gemm_1_1Gemm.html#a71f0c91768a1a87e94030c8c2db51e55',1,'cutlass::gemm::Gemm::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#abb0741601652df8fdf927d49c2c0e4d0',1,'cutlass::gemm::GemmEpilogue::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#abf97949c238d72854225c1c6131b5cbc',1,'cutlass::gemm::GemmEpilogueTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#ad8f262d7da093d07cdd5c6a4fd9aceea',1,'cutlass::gemm::GemmConfig::ScalarC()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a8f78d4a68817760099081523aa7fd443',1,'cutlass::gemm::GemmTraits::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#af1a6d91d4734683ea791bf57f3c3bbb0',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#acdd554e996a712ff62eb70d6ecf8e116',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#a1af758cb98c33060462a2706856b0a01',1,'cutlass::gemm::ThreadMultiplyAdd::ScalarC()']]], + ['scalard',['ScalarD',['../structcutlass_1_1gemm_1_1Gemm.html#ae2aa3663f9f6f5708e816dcf7cd66694',1,'cutlass::gemm::Gemm::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4887b56a96694ce6350db77f78bb505f',1,'cutlass::gemm::GemmEpilogue::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a1ee74d6f89b044578e1cd6dd210ce5fe',1,'cutlass::gemm::GemmEpilogueTraits::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a188ef7f4c49ff2830753218343a1b8f8',1,'cutlass::gemm::GemmConfig::ScalarD()'],['../structcutlass_1_1gemm_1_1GemmTraits.html#a3129be75ee087603170f8367e10e070e',1,'cutlass::gemm::GemmTraits::ScalarD()']]], + ['scalarepilogue',['ScalarEpilogue',['../structcutlass_1_1gemm_1_1Gemm.html#a9349fc5f20215c1c6508e250b0b4e936',1,'cutlass::gemm::Gemm']]], + ['shape',['Shape',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a89f1d9599b418c8bb81c104ca86cf00e',1,'cutlass::gemm::GemmMultiplicandTraits::Shape()'],['../structcutlass_1_1ShapeScale.html#aae9cfc35c517cd89018e4f914acbac29',1,'cutlass::ShapeScale::Shape()'],['../structcutlass_1_1ShapeAdd.html#ad4712a1339445038949445de1dd74e71',1,'cutlass::ShapeAdd::Shape()'],['../structcutlass_1_1ShapeSub.html#a24b6dd8cb6171b85c4e2f37407f9a5c9',1,'cutlass::ShapeSub::Shape()'],['../structcutlass_1_1ShapeMul.html#a8875fc5e861339f981360ed774e8cc94',1,'cutlass::ShapeMul::Shape()'],['../structcutlass_1_1ShapeDiv.html#a108ded386ef6708afc6fe769a77a234b',1,'cutlass::ShapeDiv::Shape()'],['../structcutlass_1_1ShapeMax.html#ad566aceac2563024982eeabb78c6c961',1,'cutlass::ShapeMax::Shape()'],['../structcutlass_1_1ShapeMin.html#a5c813e4c34ea612431d31b36120f8549',1,'cutlass::ShapeMin::Shape()'],['../structcutlass_1_1ShapeStrides.html#ac6fcda9b8e1782f24c1e6d67cd880a6a',1,'cutlass::ShapeStrides::Shape()']]], + ['sharedloaditeratora',['SharedLoadIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a365aed4c0e2ad1bffea517ee36998557',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a1bbb198a50b5f01a0502df44bb678620',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#aa93043ac87d89ce7fb991c9195c3bf99',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorA()']]], + ['sharedloaditeratorb',['SharedLoadIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a4de905aadc734df69fd0db83f01be56e',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a8d09409973094ca2a17633776a64a303',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a42322b9b10e894fe157e527b378c59f8',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadIteratorB()']]], + ['sharedloaditeratord',['SharedLoadIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a4a0b439f8a57d8e67174ecbd96183070',1,'cutlass::gemm::GemmEpilogue::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a9822fa405b32cc2f471c9fdd37585cb5',1,'cutlass::gemm::GemmEpilogueTraits::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#adbff60de6f90ef4d5ae0c7096692e2c0',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad33ee44527a7fcfd41b4e677927fd4fa',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadIteratorD()']]], + ['sharedloadstreama',['SharedLoadStreamA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ae01371eb31b88fa83c4926564cecafdc',1,'cutlass::gemm::GemmTraits::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#aa5ebe3a857b55412a86ec65ad1c55dd8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a21c860cc877df13d22dd30eeb5e2b06b',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a70063eb7e19921efef55a6f32562773f',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamA()']]], + ['sharedloadstreamb',['SharedLoadStreamB',['../structcutlass_1_1gemm_1_1GemmTraits.html#acaeb27063a444e2a3b93f3cb70e3c290',1,'cutlass::gemm::GemmTraits::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a12447ce4d11601a625662f9d177cc3d8',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#ac5eeca1e91f0e0d4dd48d432d5213215',1,'cutlass::gemm::HgemmTraitsHelper::SharedLoadStreamB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a54e8ad5874306a3764951a9791f02c96',1,'cutlass::gemm::IgemmTraitsHelper::SharedLoadStreamB()']]], + ['sharedloadtiletraits',['SharedLoadTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#ab8ba28fd1da48fcabbafc0de91281b46',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af534fc5698513af3c6724b68ae03316d',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a1125408805bc697755f2b16594c6c8e1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a118bb34a6f58c3e5a989773b4b597d8c',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a9335aca8b152ff1167763de8ff8fb882',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a458cbcc16fc296d024f2a1a95fb926c1',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#af1bc7f7c26db3399201cd95f35a56790',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a851113bffb5b656c5c649845852b3b8d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedLoadTileTraits()']]], + ['sharedloadtransformerd',['SharedLoadTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a132cabbc1402c87c7b35dea427001a13',1,'cutlass::gemm::GemmEpilogue']]], + ['sharedstorage',['SharedStorage',['../structcutlass_1_1gemm_1_1Gemm.html#ad10627d508fad0efae1fb91b26d7a6b7',1,'cutlass::gemm::Gemm::SharedStorage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac36dad8a7b6bc7fc6ef88e44068468dc',1,'cutlass::gemm::GemmEpilogue::SharedStorage()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a22c671494d487511c71f2b0f26fdb404',1,'cutlass::gemm::SharedLoadStream::SharedStorage()'],['../structcutlass_1_1TileLoadIterator.html#ab457bd7953af9ef418510f55f52d1f39',1,'cutlass::TileLoadIterator::SharedStorage()'],['../structcutlass_1_1TileStoreIterator.html#ab7922305d47b67e6cfb439e4e8d9f09b',1,'cutlass::TileStoreIterator::SharedStorage()']]], + ['sharedstorefragmentd',['SharedStoreFragmentD',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a5e64440830b36899f9c0ed8b369665c8',1,'cutlass::gemm::IgemmEpilogueTraitsHelper']]], + ['sharedstoreiteratora',['SharedStoreIteratorA',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a3a20852daeb46c625b2391d078b30d73',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#a7f022d423d42d4081cefa7eb26b4d5b4',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorA()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#ae187303a8da63f36960687a4730f4c46',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorA()']]], + ['sharedstoreiteratorb',['SharedStoreIteratorB',['../structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html#a43713f534798b1e27c4ba38b72e63c08',1,'cutlass::gemm::SimplifiedGemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1HgemmTraitsHelper.html#abe3383e7338c08841fd8f0bfb1090448',1,'cutlass::gemm::HgemmTraitsHelper::SharedStoreIteratorB()'],['../structcutlass_1_1gemm_1_1IgemmTraitsHelper.html#a4d6658f3a3b53760b10a3da9c807b81f',1,'cutlass::gemm::IgemmTraitsHelper::SharedStoreIteratorB()']]], + ['sharedstoreiteratord',['SharedStoreIteratorD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#aab0a964efe223c5c29bc816c393b5a9a',1,'cutlass::gemm::GemmEpilogue::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a74f4beb86447f6b613e9b60234cb27bc',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a02a517fd246fb961727d3bd1b4f954be',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreIteratorD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#af7024128202d642d3535e1ae5cf5f43d',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreIteratorD()']]], + ['sharedstorestorage',['SharedStoreStorage',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a69092e298d5723028fc24235d72f87fa',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['sharedstorestoragea',['SharedStoreStorageA',['../structcutlass_1_1gemm_1_1GemmTraits.html#a8d49ad32fc9d8c14f6141690962c3f9c',1,'cutlass::gemm::GemmTraits']]], + ['sharedstorestorageb',['SharedStoreStorageB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a438b80cd8d8df0e74014ae47a162f7ed',1,'cutlass::gemm::GemmTraits']]], + ['sharedstoretiletraits',['SharedStoreTileTraits',['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#a3a0fb3a914bfd009ff2e3918bcd231a9',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#aaa198fed841af6bf26bf2e9544d0a877',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad6511b7c2d84a9f6c3ed3639269ac44f',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a1884cbc21987aec651fa8149d4ed1a06',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#acbeea56f0ce95ddd632db3482c1021e5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a2aad3b2454d956f20dac1bb0ad75a2f8',1,'cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ab1ae3d51f65f7af60147da1c51a7a0c2',1,'cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#ad7659dc0eaa491447ad127ef7098924f',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a7624585480f83a46725c92b5dee20ebc',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aca6118b5bbe6f667f05c53bd52543045',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits()']]], + ['sharedstoretransformerd',['SharedStoreTransformerD',['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9063e7fc044a679652d5a3a31aa77e7c',1,'cutlass::gemm::GemmEpilogue::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraits.html#a0b8ac1972b2f2cff48070f8b862ed25c',1,'cutlass::gemm::GemmEpilogueTraits::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html#aa5cea8dbebda9a12a503ae1416c4da33',1,'cutlass::gemm::GemmEpilogueTraitsHelper::SharedStoreTransformerD()'],['../structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html#a00000e0cd14b9e6e242eafb5133af8cf',1,'cutlass::gemm::IgemmEpilogueTraitsHelper::SharedStoreTransformerD()']]], + ['skew',['Skew',['../structcutlass_1_1TileIteratorBase.html#ae89afbcf642b3023770ff22969c51d16',1,'cutlass::TileIteratorBase::Skew()'],['../structcutlass_1_1TileLoadIterator.html#a11ec4297c9a1352c8005ac222892b35c',1,'cutlass::TileLoadIterator::Skew()'],['../structcutlass_1_1TileStoreIterator.html#a57348779bb004ed1ea0fd9cc252e895d',1,'cutlass::TileStoreIterator::Skew()']]], + ['storage',['Storage',['../structcutlass_1_1PredicateVector.html#afe85a07b9f311327c6bf04e3a5f94e5a',1,'cutlass::PredicateVector::Storage()'],['../classcutlass_1_1TensorRef.html#a604921388cb7ee18ddb8127b8ca2f7fd',1,'cutlass::TensorRef::Storage()'],['../structcutlass_1_1TileIteratorBase.html#a6ca47fd6e2f9cbb3498c138417ea414a',1,'cutlass::TileIteratorBase::Storage()']]], + ['storeiterator',['StoreIterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a15eee5bf6367a36a5b5c8024437f4834',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['strides',['Strides',['../structcutlass_1_1FragmentIterator.html#a2858ba9a8a9bbaef1de73415cff9b3c1',1,'cutlass::FragmentIterator']]] +]; diff --git a/docs/generated-html/search/typedefs_e.html b/docs/generated-html/search/typedefs_e.html new file mode 100644 index 0000000000..ccde4cc1d3 --- /dev/null +++ b/docs/generated-html/search/typedefs_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_e.js b/docs/generated-html/search/typedefs_e.js new file mode 100644 index 0000000000..529cbd114b --- /dev/null +++ b/docs/generated-html/search/typedefs_e.js @@ -0,0 +1,21 @@ +var searchData= +[ + ['tensorref_5ft',['TensorRef_t',['../classcutlass_1_1TensorView.html#a762fc3d887ab14f4c7bcde85f0af16ab',1,'cutlass::TensorView']]], + ['this_5f',['This_',['../structcutlass_1_1Fragment.html#a32f7ff86b73576a15c5ddaa40c4e0a95',1,'cutlass::Fragment::This_()'],['../structcutlass_1_1FragmentIterator.html#ae320d9672450f5341abcdb24a8b09369',1,'cutlass::FragmentIterator::This_()'],['../structcutlass_1_1FragmentConstIterator.html#add14f695231c2bdd6284bf22b1e66f8f',1,'cutlass::FragmentConstIterator::This_()'],['../structcutlass_1_1gemm_1_1Gemm.html#a26c13e8bbad805760443ef6df475e317',1,'cutlass::gemm::Gemm::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a2892be253a3de5bffc3edcef2890d3a8',1,'cutlass::gemm::GemmGlobalIteratorAb::This_()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6a745d66c4c7de352041f779e54e6b2b',1,'cutlass::gemm::GemmGlobalIteratorCd::This_()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aa8b453116c2d96ea2c56e08cb981346c',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::This_()']]], + ['threadblocktile',['ThreadBlockTile',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a5e43f3c9aa8d7dc5f01dfc63b1ea97dc',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['threadoffset',['ThreadOffset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afd09d3b8e5ca04eab7edc2e5723816e5',1,'cutlass::gemm::GemmGlobalIteratorAb::ThreadOffset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a6d985f8e93be21e56f72ec1400d73df1',1,'cutlass::gemm::GemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a667cae4a9fa78a6df073f5ee48ef9664',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::ThreadOffset()'],['../structcutlass_1_1TileTraits.html#af9c0fc178dac7f9dac8d254da34e04dd',1,'cutlass::TileTraits::ThreadOffset()'],['../structcutlass_1_1TileIteratorBase.html#a5abf4755aee07dc58b1d6183fbf4786f',1,'cutlass::TileIteratorBase::ThreadOffset()'],['../structcutlass_1_1TileLoadIterator.html#a8a1527b4b469ae1f97afde2502ece70d',1,'cutlass::TileLoadIterator::ThreadOffset()'],['../structcutlass_1_1TileStoreIterator.html#a6a6f51f459f98c0cddeacf476660cd27',1,'cutlass::TileStoreIterator::ThreadOffset()'],['../structcutlass_1_1TileTraitsStrideMajor.html#ae8d14a3c6871072febfd75ed08aba32c',1,'cutlass::TileTraitsStrideMajor::ThreadOffset()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a823ba83e9ca680da0af7d63be772a351',1,'cutlass::TileTraitsContiguousMajor::ThreadOffset()']]], + ['threads',['Threads',['../structcutlass_1_1gemm_1_1ReshapeThreads.html#afd3614ff45f0fc77ad4967951cb5ab57',1,'cutlass::gemm::ReshapeThreads::Threads()'],['../structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html#a894932ad04fae3aea06eb6d259e01c1c',1,'cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a29bd05960cc541bb67098f5483c84cf6',1,'cutlass::gemm::GemmGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a9aff3e2ff0db5a5169257e964e5895c6',1,'cutlass::gemm::GemmGlobalTileCdTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a33e4dcd4449f324fed5ceaa2cde01b50',1,'cutlass::gemm::GemmGlobalIteratorAb::Threads()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#afdd08b4f4c1feaa426f997d15cd28c02',1,'cutlass::gemm::GemmGlobalIteratorCd::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a1acf2a1d8bf73fda142e7d82e05f00a2',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a9bef06b59f27c6e673066a7f0280aa06',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Threads()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#ae7a4f120805421ac0712604723612b7e',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a5fd1a9f132c7aa0f68e129553f519d1e',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::Threads()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#aeb866237318ac7983e554a08395c5125',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Threads()']]], + ['threadsdelta',['ThreadsDelta',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a65f9ccd630dde0c9db5358cfc951583d',1,'cutlass::gemm::GemmGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#ae2f8331619e735e620f8a8cf2cdde077',1,'cutlass::gemm::GemmGlobalTileCdTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html#a6eee97f03dcea1c441116e143cf58018',1,'cutlass::gemm::HgemmCrosswiseGlobalTileTraits::ThreadsDelta()'],['../structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html#a2bb0f0820e52417ff77e7a2bdb9ed434',1,'cutlass::gemm::IgemmContiguousGlobalTileTraits::ThreadsDelta()']]], + ['threadshape',['ThreadShape',['../structcutlass_1_1TileTraitsStrideMajor.html#a03567f41ce616ebb4cdb309c85820599',1,'cutlass::TileTraitsStrideMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a33116b67e580292d4e354ca17ecd4167',1,'cutlass::TileTraitsContiguousMajor::ThreadShape()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad6619e0b5d876fafd51c78e39f2c029e',1,'cutlass::TileTraitsWarpRake::ThreadShape()']]], + ['threadsperwarp',['ThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0761c497c41a45652368fc0d54def98f',1,'cutlass::gemm::GemmSharedLoadTileATraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aed92656a074e915d97a1b6a990aeba66',1,'cutlass::gemm::GemmSharedLoadTileBTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#adf72ea773b8d4d3eb184f59c8cdf9543',1,'cutlass::gemm::GemmSharedStoreTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a9022ffc49b32503fd3639341e7e291a3',1,'cutlass::gemm::GemmSharedLoadTileDTraits::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html#aa784f29ff453c1656fdea8270454fa55',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html#a5bc98fd196c1f1e4e3f1bfc621df4f50',1,'cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html#ad2fbba0a70da29af27ed4578577abc5e',1,'cutlass::gemm::ThreadMultiplyAdd::ThreadsPerWarp()']]], + ['threadsstrides',['ThreadsStrides',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae540e7ea7106552682aa4c97b833b3b1',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::ThreadsStrides()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a2053e4b9cb3ed2727c89960354ea0b29',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::ThreadsStrides()']]], + ['tile',['Tile',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aebbf8834d0d88f0e5b3e1926db5e6758',1,'cutlass::gemm::GemmGlobalTileTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ab96f324083e51ce4c2b73c18803c69a7',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a74196946c28e98ee60346b0eeede1471',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a9a00be672617162c4c7ac94c7d8980cc',1,'cutlass::gemm::GemmSharedLoadTileATraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac242508ec46db0493a69a589dbfc19e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a2bc41b907417b47f3dca9c3dd358f8bc',1,'cutlass::gemm::GemmSharedStoreTileDTraits::Tile()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a63f980fea1ff3dd83ac276cfd83a4ce5',1,'cutlass::gemm::GemmSharedLoadTileDTraits::Tile()'],['../structcutlass_1_1ReshapeTile.html#a8d57fe6422aa920d9815a66e5a85b5f5',1,'cutlass::ReshapeTile::Tile()'],['../structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html#a966a9432cf42dfdff8ad6b89ebd74f06',1,'cutlass::ReshapeTile< Tile_, kAccessSize_, true >::Tile()'],['../structcutlass_1_1TileTraits.html#ab831be0adb255eece4f2e12fd9713831',1,'cutlass::TileTraits::Tile()'],['../structcutlass_1_1TileIteratorBase.html#a954ef18acc12d8256a7d4e37683f8c2c',1,'cutlass::TileIteratorBase::Tile()'],['../structcutlass_1_1TileLoadIterator.html#a7f1499ada284c21624487d4d3a5dbd10',1,'cutlass::TileLoadIterator::Tile()'],['../structcutlass_1_1TileStoreIterator.html#a8a87c8ef986e110a01a9226012594a61',1,'cutlass::TileStoreIterator::Tile()'],['../structcutlass_1_1TileTraitsStrideMajor.html#afbb78ece048b868475d4a6802e6894ac',1,'cutlass::TileTraitsStrideMajor::Tile()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a1607d53544302c12278793bc9b283763',1,'cutlass::TileTraitsContiguousMajor::Tile()'],['../structcutlass_1_1TileTraitsWarpRake.html#adcd658d9daf286368a9d51c8c1647f89',1,'cutlass::TileTraitsWarpRake::Tile()'],['../structcutlass_1_1TileTraitsStandard.html#aee3fee526bc4d4820c03665a2f5f166b',1,'cutlass::TileTraitsStandard::Tile()']]], + ['tilewithoutskew',['TileWithoutSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a050cf5964a2d3683491bc4313ead5450',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a5a5a36fc570e1225b20ce0a48c89d213',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a1f35981a6d661635dfbcf7c7a76056a2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew()']]], + ['tilewithoutskew_5f',['TileWithoutSkew_',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a93ae99460695718babaef6d1ef597e38',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithoutSkew_()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a3d8be9ddea1cab53d1b4b3d508f9eab8',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithoutSkew_()']]], + ['tilewithskew',['TileWithSkew',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a72e0214f86cf8b3711d006dcd69d7a17',1,'cutlass::gemm::GemmSharedLoadTileATraits::TileWithSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a69c7ec2a779718556e6d9119588e791c',1,'cutlass::gemm::GemmSharedLoadTileBTraits::TileWithSkew()']]], + ['traits',['Traits',['../structcutlass_1_1gemm_1_1Gemm.html#a29f52e33e1f1cf150f5062d9ad2590ff',1,'cutlass::gemm::Gemm::Traits()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a645ab6e9e63163ee6bf536717a30fb1b',1,'cutlass::gemm::GemmEpilogue::Traits()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af2b5682b8e6dd13590ec258a44636430',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Traits()'],['../structcutlass_1_1TileIteratorBase.html#ae7add0ee02bbec2c130ebaf608ab0696',1,'cutlass::TileIteratorBase::Traits()'],['../structcutlass_1_1TileLoadIterator.html#a7c6182031d9aa41d0e4a64516723e20a',1,'cutlass::TileLoadIterator::Traits()'],['../structcutlass_1_1TileStoreIterator.html#a6f50a8aec2d7045e9057b93df08172a8',1,'cutlass::TileStoreIterator::Traits()']]], + ['transformedfragment',['TransformedFragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afe7503a3304eefd633581d6bc73a0108',1,'cutlass::gemm::GlobalLoadStreamBase::TransformedFragment()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#aa2227d7fa1edef3f6730c7db41b132b4',1,'cutlass::gemm::SharedLoadStream::TransformedFragment()']]], + ['transformer',['Transformer',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#aa24bd9f94bea04a148b49b2a97b63fbe',1,'cutlass::gemm::GlobalLoadStreamBase::Transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#ad1f70f0dd1027da1353ff7a38f524904',1,'cutlass::gemm::SharedLoadStream::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a882c10bed18f62ece97f5f20f9de3296',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a42c5bafcb226623b3326dbd01fc72f3b',1,'cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#aaaccb3f02a857e0c80d2891c6c6dcdb7',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#ae66bb2c1f87e19278ff471c32e71ea85',1,'cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html#a98aefa95117dbfdf2e577890318a6c13',1,'cutlass::gemm::IgemmGlobalStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html#a52ecdfd8b94d8d7f4881048e11a33aba',1,'cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html#ad3190650741cef20c1aca919eddd9d72',1,'cutlass::gemm::IgemmGlobalLoadTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html#a49c249026be24ec8a66f5eda99cb855c',1,'cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html#a9edd08d595327a8cc3b8da50622b3bd2',1,'cutlass::gemm::IgemmSharedStoreTransformer::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a0b53e18f109ac0fd116e0d01ed6ec197',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a8a4e3ce1174789e2b695bda7b863079f',1,'cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html#a92320b7224a77a8af61e55beef30ad49',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer()'],['../structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html#a9728f71c2e7a6a649bd28d8c11241b0a',1,'cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer()']]], + ['true_5ftype',['true_type',['../namespacecutlass_1_1platform.html#a0eddc4a3921e137f31fd8014be96e807',1,'cutlass::platform']]], + ['type',['Type',['../structcutlass_1_1StorageType.html#a2b9c99ae52eb4962428f776efc1e7f06',1,'cutlass::StorageType::Type()'],['../structcutlass_1_1StorageType_3_014_01_4.html#aa6754c0eb530544a1457afe1ae94a807',1,'cutlass::StorageType< 4 >::Type()'],['../structcutlass_1_1StorageType_3_012_01_4.html#a66c52fe770774ea01c511aea1af1f8d4',1,'cutlass::StorageType< 2 >::Type()'],['../structcutlass_1_1StorageType_3_011_01_4.html#a4a70002785c378c1f180800f2a65bcd4',1,'cutlass::StorageType< 1 >::Type()'],['../structcutlass_1_1Vectorize.html#a070ec95f4297d769ee53a4d8a650c05e',1,'cutlass::Vectorize::Type()'],['../structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html#a79f147933e3f520145aee94ae18da3c5',1,'cutlass::Vectorize< Element_, 1 >::Type()'],['../structcutlass_1_1platform_1_1integral__constant.html#af58810ccead8f16ed88cd6a4afdc6e52',1,'cutlass::platform::integral_constant::type()'],['../structcutlass_1_1platform_1_1enable__if.html#aff9c0f270020cf097addf77e53a5af99',1,'cutlass::platform::enable_if::type()'],['../structcutlass_1_1platform_1_1conditional.html#ab6484d0dd6449b5195c4e868026fed11',1,'cutlass::platform::conditional::type()'],['../structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html#a8d55f500f667de560650554e9c220644',1,'cutlass::platform::conditional< false, T, F >::type()'],['../structcutlass_1_1platform_1_1remove__const.html#ac3662947fa50251daf58240a9c798085',1,'cutlass::platform::remove_const::type()'],['../structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html#af68706cfaa6af14edc26ad5b974b47e3',1,'cutlass::platform::remove_const< const T >::type()'],['../structcutlass_1_1platform_1_1remove__volatile.html#a4f5b043d46206248d1bbbcf650707dd1',1,'cutlass::platform::remove_volatile::type()'],['../structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html#aca9bb93efe43106321e4afe0b67542a3',1,'cutlass::platform::remove_volatile< volatile T >::type()'],['../structcutlass_1_1platform_1_1remove__cv.html#a19e5b12cf4eb15ce13d6306735b6de08',1,'cutlass::platform::remove_cv::type()'],['../structcutlass_1_1platform_1_1aligned__storage.html#a9cf0360f335bcd1e9d9e1b266b6dd6c1',1,'cutlass::platform::aligned_storage::type()']]] +]; diff --git a/docs/generated-html/search/typedefs_f.html b/docs/generated-html/search/typedefs_f.html new file mode 100644 index 0000000000..09099791ca --- /dev/null +++ b/docs/generated-html/search/typedefs_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/typedefs_f.js b/docs/generated-html/search/typedefs_f.js new file mode 100644 index 0000000000..1d8d485106 --- /dev/null +++ b/docs/generated-html/search/typedefs_f.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['value_5ftype',['value_type',['../structcutlass_1_1platform_1_1integral__constant.html#ab2ed0b3506818139f1f96639742e79fd',1,'cutlass::platform::integral_constant']]], + ['vector',['Vector',['../structcutlass_1_1VectorTraits.html#a4ac6196c07e0d3ba8a03cd72a05026a2',1,'cutlass::VectorTraits::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a12b9084c48d2d829730f907485dfb5e5',1,'cutlass::VectorTraits< Vector< T, Lanes > >::Vector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#aff21f15596731eacf8c587811bb4ccdb',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::Vector()']]] +]; diff --git a/docs/generated-html/search/variables_0.html b/docs/generated-html/search/variables_0.html new file mode 100644 index 0000000000..51f7bd6bcc --- /dev/null +++ b/docs/generated-html/search/variables_0.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_0.js b/docs/generated-html/search/variables_0.js new file mode 100644 index 0000000000..6dbf197f17 --- /dev/null +++ b/docs/generated-html/search/variables_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['aligned_5f',['aligned_',['../unioncutlass_1_1Vector.html#a9e9352594fcd022526d5b69b6c25c99c',1,'cutlass::Vector::aligned_()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#a9e41dbe541a7dddf1e461e0390fe8896',1,'cutlass::Vector< half, kLanes_ >::aligned_()']]], + ['alpha',['alpha',['../structcutlass_1_1gemm_1_1GemmDesc.html#a053c2b529be527f510ee317737fbf7e8',1,'cutlass::gemm::GemmDesc::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a3248d6b3d9bcc59365d582b879292a70',1,'cutlass::gemm::LinearScaling::Params::alpha()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#ab9c51c8b1f06e935a353ac5b1c22cee6',1,'cutlass::gemm::LinearScaling::alpha()']]] +]; diff --git a/docs/generated-html/search/variables_1.html b/docs/generated-html/search/variables_1.html new file mode 100644 index 0000000000..f46154d8bc --- /dev/null +++ b/docs/generated-html/search/variables_1.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_1.js b/docs/generated-html/search/variables_1.js new file mode 100644 index 0000000000..15bf17b9f1 --- /dev/null +++ b/docs/generated-html/search/variables_1.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['beta',['beta',['../structcutlass_1_1gemm_1_1GemmDesc.html#ab91b702a9932144b388fad3159130332',1,'cutlass::gemm::GemmDesc::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html#a0e455ad2e4eba67259867f9123ca817b',1,'cutlass::gemm::LinearScaling::Params::beta()'],['../structcutlass_1_1gemm_1_1LinearScaling.html#a8af4e58c4988838f2dd0a2172c47e12e',1,'cutlass::gemm::LinearScaling::beta()']]], + ['byte',['byte',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#a86f075f91b80918e968951713430f0b4',1,'cutlass::platform::alignment_of::pad']]] +]; diff --git a/docs/generated-html/search/variables_10.html b/docs/generated-html/search/variables_10.html new file mode 100644 index 0000000000..b62b717e91 --- /dev/null +++ b/docs/generated-html/search/variables_10.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_10.js b/docs/generated-html/search/variables_10.js new file mode 100644 index 0000000000..c577038591 --- /dev/null +++ b/docs/generated-html/search/variables_10.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['val',['val',['../structcutlass_1_1platform_1_1alignment__of_1_1pad.html#abc729cc51d5c90b1d7b0df3092d47cd4',1,'cutlass::platform::alignment_of::pad']]], + ['value',['value',['../structcutlass_1_1platform_1_1integral__constant.html#a9bbaca83ae76941edb9b75b2741d3ad9',1,'cutlass::platform::integral_constant::value()'],['../structcutlass_1_1platform_1_1is__base__of__helper.html#ac7e3ab73057682cc2eb6ed74c33e5eff',1,'cutlass::platform::is_base_of_helper::value()']]] +]; diff --git a/docs/generated-html/search/variables_2.html b/docs/generated-html/search/variables_2.html new file mode 100644 index 0000000000..15275b7a9a --- /dev/null +++ b/docs/generated-html/search/variables_2.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_2.js b/docs/generated-html/search/variables_2.js new file mode 100644 index 0000000000..1f0095000c --- /dev/null +++ b/docs/generated-html/search/variables_2.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['clear',['clear',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a5513254af1f9979b6d0b9f236c3e7325',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage']]], + ['congruous',['Congruous',['../structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html#abe4eb7f9a0ed7d48a81029e88849dcf2',1,'cutlass::gemm::GemmOperandTraitsAb']]] +]; diff --git a/docs/generated-html/search/variables_3.html b/docs/generated-html/search/variables_3.html new file mode 100644 index 0000000000..fbc36712ff --- /dev/null +++ b/docs/generated-html/search/variables_3.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_3.js b/docs/generated-html/search/variables_3.js new file mode 100644 index 0000000000..91abc96bcf --- /dev/null +++ b/docs/generated-html/search/variables_3.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['d_5fa',['d_a',['../structcutlass_1_1gemm_1_1GemmDesc.html#aae63781de41962f496da469684919447',1,'cutlass::gemm::GemmDesc']]], + ['d_5fb',['d_b',['../structcutlass_1_1gemm_1_1GemmDesc.html#a05915032eba39bc9b085bec5ff17257b',1,'cutlass::gemm::GemmDesc']]], + ['d_5fc',['d_c',['../structcutlass_1_1gemm_1_1GemmDesc.html#aa2b3126c082d04fd31521cb0e84cf4d5',1,'cutlass::gemm::GemmDesc']]], + ['d_5fd',['d_d',['../structcutlass_1_1gemm_1_1GemmDesc.html#a30326e2d81c8e154d749f35837903216',1,'cutlass::gemm::GemmDesc']]] +]; diff --git a/docs/generated-html/search/variables_4.html b/docs/generated-html/search/variables_4.html new file mode 100644 index 0000000000..8067e67f8f --- /dev/null +++ b/docs/generated-html/search/variables_4.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_4.js b/docs/generated-html/search/variables_4.js new file mode 100644 index 0000000000..a631b3e77e --- /dev/null +++ b/docs/generated-html/search/variables_4.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['epilogue',['epilogue',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a073430a1e8b124aec8a1f1e00f262bc8',1,'cutlass::gemm::GemmTraits::Params::epilogue()'],['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#afdca9ac1d28e17efaa394f5831a60c04',1,'cutlass::gemm::GemmTraits::SharedStorage::epilogue()']]] +]; diff --git a/docs/generated-html/search/variables_5.html b/docs/generated-html/search/variables_5.html new file mode 100644 index 0000000000..7e95e946f8 --- /dev/null +++ b/docs/generated-html/search/variables_5.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_5.js b/docs/generated-html/search/variables_5.js new file mode 100644 index 0000000000..b10e64d980 --- /dev/null +++ b/docs/generated-html/search/variables_5.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['fetched_5fa',['fetched_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a3147da380e4c1e465aba0b965ac87ab5',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5fb',['fetched_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a837fbec1d47ae45480941de6290889c0',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['fetched_5ffragment',['fetched_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a26aa580a2697ad02c27f868e7779348d',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['functor',['functor',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#afa888d993b86ed88950a9e5ab7edeb06',1,'cutlass::gemm::GemmEpilogueTraits::Params']]] +]; diff --git a/docs/generated-html/search/variables_6.html b/docs/generated-html/search/variables_6.html new file mode 100644 index 0000000000..3d398e6286 --- /dev/null +++ b/docs/generated-html/search/variables_6.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_6.js b/docs/generated-html/search/variables_6.js new file mode 100644 index 0000000000..859d50bf7c --- /dev/null +++ b/docs/generated-html/search/variables_6.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['global',['global',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#a3c2980547310ec4307f3a5f9817dfc51',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['global_5fstream_5fa',['global_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a575bcff901d69ae3f46987222f23ab64',1,'cutlass::gemm::GemmTraits::Params']]], + ['global_5fstream_5fb',['global_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a46affe35cb16874de5a2b9777aedf596',1,'cutlass::gemm::GemmTraits::Params']]] +]; diff --git a/docs/generated-html/search/variables_7.html b/docs/generated-html/search/variables_7.html new file mode 100644 index 0000000000..7b791460ae --- /dev/null +++ b/docs/generated-html/search/variables_7.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_7.js b/docs/generated-html/search/variables_7.js new file mode 100644 index 0000000000..504309120a --- /dev/null +++ b/docs/generated-html/search/variables_7.js @@ -0,0 +1,12 @@ +var searchData= +[ + ['idx',['idx',['../structcutlass_1_1Coord.html#a50de265129f1db7bdf2f0aefbc6a46bc',1,'cutlass::Coord']]], + ['inc_5fadvance',['inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a8c2618ac16362a8362dcddeed71c41d4',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a857db0c999250248b104f17f13fe9bd8',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_advance()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a1187258cd4068a627e73bee0302f1fc2',1,'cutlass::TileIteratorBase::Params::inc_advance()']]], + ['inc_5fd',['inc_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#af95fa1b5102176a0fa9b17713fd48150',1,'cutlass::TileIteratorBase::Params']]], + ['inc_5fh',['inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#aed94505e5a269d5f33499e71284104f5',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a6306f771718c0c05276e103f30f862b2',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::inc_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#aea591d4278a8338ae8b50fa0b8f3a366',1,'cutlass::TileIteratorBase::Params::inc_h()']]], + ['inc_5fw',['inc_w',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ac6e81450a2d78555a6c2415dcc42b178',1,'cutlass::TileIteratorBase::Params']]], + ['isvector',['IsVector',['../structcutlass_1_1VectorTraits.html#abf96ea5dfd3212d388cb91e48cc0e6a2',1,'cutlass::VectorTraits::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aead181209c756f25ab5870682670bb99',1,'cutlass::VectorTraits< Vector< T, Lanes > >::IsVector()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a893488718d8437970c1b4ed4f4056620',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::IsVector()']]], + ['iterator',['iterator',['../structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html#ae59f871c06a0ac7b9224f0de923082d7',1,'cutlass::gemm::SharedLoadStream::Params::iterator()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#a54481a42d4125e3693a086269d9a7b10',1,'cutlass::gemm::SharedLoadStream::iterator()']]], + ['iterator_5fc',['iterator_c',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a7350ceefcd09a9e3662ca30b780cc2ce',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['iterator_5fd',['iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a987c179a7e73c2572fe8aef3255668f7',1,'cutlass::gemm::GemmEpilogueTraits::Params']]] +]; diff --git a/docs/generated-html/search/variables_8.html b/docs/generated-html/search/variables_8.html new file mode 100644 index 0000000000..8ebc5f6b77 --- /dev/null +++ b/docs/generated-html/search/variables_8.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_8.js b/docs/generated-html/search/variables_8.js new file mode 100644 index 0000000000..7406b11917 --- /dev/null +++ b/docs/generated-html/search/variables_8.js @@ -0,0 +1,64 @@ +var searchData= +[ + ['k',['k',['../structcutlass_1_1gemm_1_1GemmDesc.html#ac789a7e5d2db65d006f1e8e3df542a6f',1,'cutlass::gemm::GemmDesc::k()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aae3a008b39f9678a03192f6ff54152d8',1,'cutlass::gemm::GemmTraits::Params::k()']]], + ['kaccesssize',['kAccessSize',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#aa001e09b246fdd8259cbda6a500cad5f',1,'cutlass::gemm::GemmGlobalTileTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ae852c89da0455025c0c41af258e47047',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#a846e6d8d06be0ba6fa41b1431c8ec061',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a0a33d4289ed45e988d560b5f73ac997e',1,'cutlass::gemm::GemmSharedLoadTileATraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#aa41cc5dc82fe08457d103545f8f63081',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a9521c4017e227b2511891a7fb18513e1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kAccessSize()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8d308d593b59624abe3e228d588be61d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kAccessSize()'],['../structcutlass_1_1TileIteratorBase.html#aef07ba456ea016092d7d2446751b76a3',1,'cutlass::TileIteratorBase::kAccessSize()']]], + ['kaccumulatorsperldsa',['kAccumulatorsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#abbdd356f280099269867e614684645cf',1,'cutlass::gemm::GemmConfig']]], + ['kaccumulatorsperldsb',['kAccumulatorsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a9dd092bca2f1f2c039f367b23bafa9c1',1,'cutlass::gemm::GemmConfig']]], + ['kadvance',['kAdvance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a8c1e871f17685b16a7a41fcc888f0125',1,'cutlass::gemm::GemmGlobalIteratorAb::kAdvance()'],['../structcutlass_1_1TileIteratorBase.html#ac1a64e974dcd69c3a86a31db6cbff421',1,'cutlass::TileIteratorBase::kAdvance()'],['../structcutlass_1_1TileLoadIterator.html#a69d2f21c8188fb3229af8c2dbe0a23b6',1,'cutlass::TileLoadIterator::kAdvance()'],['../structcutlass_1_1TileStoreIterator.html#a8059c57030df99b73309e9210ec5f624',1,'cutlass::TileStoreIterator::kAdvance()']]], + ['kbytes',['kBytes',['../structcutlass_1_1PredicateVector.html#ab870e074b33c598f69fe11e104615c5a',1,'cutlass::PredicateVector']]], + ['kc',['kC',['../structcutlass_1_1Shape.html#a3f2433fd6401dd28f1130499f9fd340c',1,'cutlass::Shape']]], + ['kcount',['kCount',['../structcutlass_1_1ShapeCount.html#a8d25b48b3294b5563f89c62a6e6d00e5',1,'cutlass::ShapeCount']]], + ['kd',['kD',['../structcutlass_1_1Shape.html#a19086a5567d6c710ec853e35a7f29c25',1,'cutlass::Shape']]], + ['kdhw',['kDhw',['../structcutlass_1_1ShapeCount.html#af7d7ccd42de2c49fe57f03cf0e657fe8',1,'cutlass::ShapeCount']]], + ['kdhwc',['kDhwc',['../structcutlass_1_1ShapeCount.html#a5a274564d6b8607a0be621b2664fba18',1,'cutlass::ShapeCount']]], + ['kelements',['kElements',['../structcutlass_1_1Fragment.html#a2b9a64391d00ef23dd8d456c2337fa60',1,'cutlass::Fragment']]], + ['kelementsperaccess',['kElementsPerAccess',['../structcutlass_1_1FragmentIterator.html#ad2c43e30e78e8799df7cb02ac08cee9a',1,'cutlass::FragmentIterator::kElementsPerAccess()'],['../structcutlass_1_1FragmentConstIterator.html#a004fabc9caa6924f3fb4badcbb19e88f',1,'cutlass::FragmentConstIterator::kElementsPerAccess()']]], + ['kextent',['kExtent',['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html#a881f84951bc9e47ab2be9ef3f2c1e423',1,'cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >::kExtent()'],['../structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html#a82ff9b447e4a58164b5f7d53d2602930',1,'cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >::kExtent()']]], + ['kfragmentsize',['kFragmentSize',['../structcutlass_1_1TileIteratorBase.html#a4e0b2bc06bb8f52313e4d8c51ab30ff2',1,'cutlass::TileIteratorBase']]], + ['kh',['kH',['../structcutlass_1_1Shape.html#a3a20d9062bba613c160bb2cd14f80a5e',1,'cutlass::Shape']]], + ['khw',['kHw',['../structcutlass_1_1ShapeCount.html#afc957be69eb78e4849ba8ab3cc66583f',1,'cutlass::ShapeCount']]], + ['khwc',['kHwc',['../structcutlass_1_1ShapeCount.html#a75324e2c9d31a0787343fc994586b742',1,'cutlass::ShapeCount']]], + ['kint8output',['kInt8Output',['../structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html#a8609af98d1e43cd25688bae6f33feed4',1,'cutlass::gemm::IgemmEpilogueTraits']]], + ['kiterationsd',['kIterationsD',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8663311646210b690bb0c2a1012e82f0',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsh',['kIterationsH',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a3b1a461c1dfbcd3817ab2d57bd0da9f1',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiterationsinhperwarp',['kIterationsInHPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a4b8d66df02ba1653aa6d1f23b967f237',1,'cutlass::gemm::GemmSharedLoadTileDTraits']]], + ['kiteratorfragment',['kIteratorFragment',['../structcutlass_1_1TileIteratorBase.html#a38c8ec1e9d0117172981b4c7dd4bf3be',1,'cutlass::TileIteratorBase::kIteratorFragment()'],['../structcutlass_1_1TileLoadIterator.html#aba1d75a0cd5f11dee2aecf89b2b13d98',1,'cutlass::TileLoadIterator::kIteratorFragment()'],['../structcutlass_1_1TileStoreIterator.html#a94c0567316118abfb84fc28560a5a46a',1,'cutlass::TileStoreIterator::kIteratorFragment()']]], + ['kkstrided',['kKstrided',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a1984c9ef6abfd029acbc3f702593ab85',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['klanes',['kLanes',['../structcutlass_1_1VectorTraits.html#a052e1e5963a9e04482b16cb881d1eaf8',1,'cutlass::VectorTraits::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#aca745b59c6c21292f119943e5a480f39',1,'cutlass::VectorTraits< Vector< T, Lanes > >::kLanes()'],['../structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a43ac200035052a2c352c8c4b84aac73c',1,'cutlass::VectorTraits< Vector< T, Lanes > const >::kLanes()']]], + ['klayout',['kLayout',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a807cffc6f69f8d30a2fc94cf49fb904c',1,'cutlass::gemm::GlobalLoadStreamBase::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#a74bc07cb021a73513ab2fbacd572be90',1,'cutlass::gemm::GemmGlobalTileTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#afe016e0c6234075a8d69ba7341555ece',1,'cutlass::gemm::GemmGlobalIteratorAb::kLayout()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a27b88818f5b094372bf2c6e090c9148a',1,'cutlass::gemm::GemmGlobalIteratorCd::kLayout()'],['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a19076e58e60d296da74cf504e2a473fd',1,'cutlass::gemm::GemmMultiplicandTraits::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ad2010686bceb21aec9a1924ae379edc1',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#aedd49525e2c849baecf88cdfd9e3515c',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#afbd350793888a7e7b299548dca854c13',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a31fa28168811e2d04fbd74029df785ab',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ae0f176733ba9dee0cce45435ac5d53ba',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::kLayout()']]], + ['klayouta',['kLayoutA',['../structcutlass_1_1gemm_1_1GemmTraits.html#ac5bb5931a707ed7672f69267753ba41b',1,'cutlass::gemm::GemmTraits']]], + ['klayoutb',['kLayoutB',['../structcutlass_1_1gemm_1_1GemmTraits.html#a078e8d9cfa1b182e1b96a2cc8c54b684',1,'cutlass::gemm::GemmTraits']]], + ['kmemoryspace',['kMemorySpace',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#af219ece6e66e2866169e06e15cc4472d',1,'cutlass::gemm::GemmGlobalTileTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#a59c981aa720f983b846bed7c3e4a7cab',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#ae5a07814b9cfe9a64f69bac0f0772f20',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4456e4c8048bfb378e5b80833a0d19e5',1,'cutlass::gemm::GemmSharedLoadTileATraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a7007093a4abf79a0b4bfb3fc85a02620',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a8914bc5154f21fa5fd182b0009c44c39',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kMemorySpace()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#afb4687520eff9c6a21c35a5e04f69de8',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kMemorySpace()'],['../structcutlass_1_1TileIteratorBase.html#a871c9b82109eab432c5a1d465643bf97',1,'cutlass::TileIteratorBase::kMemorySpace()'],['../structcutlass_1_1TileLoadIterator.html#ac21bd78b31c99c826f0eddb5aa033bf1',1,'cutlass::TileLoadIterator::kMemorySpace()'],['../structcutlass_1_1TileStoreIterator.html#adaebec9eacf767f63f048033de73ea5b',1,'cutlass::TileStoreIterator::kMemorySpace()']]], + ['koperand',['kOperand',['../structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html#ae0bca976b7cfba8561db4cccc16e99e1',1,'cutlass::gemm::GemmGlobalTileTraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af511bba9fc2125516eb1442b1c88d851',1,'cutlass::gemm::GemmSharedLoadTileATraits::kOperand()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#afd4881aae69c8041d3931982d85f44e4',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kOperand()']]], + ['kpredicates',['kPredicates',['../structcutlass_1_1PredicateVector.html#afff3a2142d9853606d6ad7c3a459f492',1,'cutlass::PredicateVector']]], + ['kpredicatesperbyte',['kPredicatesPerByte',['../structcutlass_1_1PredicateVector.html#a1387c4a964f971ed4611d750a09ec0b5',1,'cutlass::PredicateVector']]], + ['kpredicatestart',['kPredicateStart',['../structcutlass_1_1PredicateVector.html#acf848dce84c01453ab8a2d00c8d4f86e',1,'cutlass::PredicateVector']]], + ['kscalarsin4b',['kScalarsIn4B',['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#ad77b9084720ad7378e033e54bfb74ce7',1,'cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsIn4B()'],['../structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#a774a052f0f98f50e46dda933c81badd5',1,'cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsIn4B()']]], + ['kscalarsperldga',['kScalarsPerLdgA',['../structcutlass_1_1gemm_1_1GemmConfig.html#a2e0a043c5d4d7959ec1a2214c3ac39ac',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgb',['kScalarsPerLdgB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a849b21fed39aaac1cdd546334739be97',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldgc',['kScalarsPerLdgC',['../structcutlass_1_1gemm_1_1GemmConfig.html#aad47c635a73e83bd4b19494864832d31',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsa',['kScalarsPerLdsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#aa1b75484138923a52b32888fef608d9b',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsb',['kScalarsPerLdsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#a86470d3a44e2b50ee31ec3c9f79927ef',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperldsd',['kScalarsPerLdsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#adaf2ee5b8e6f7bdb9939cd45a186ca56',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperrow',['kScalarsPerRow',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#af1c981ec89a9cabaf5d34231d51a029c',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerRow()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#aa3e378cabce9ed7f199c179c15a12ca4',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerRow()']]], + ['kscalarsperstgd',['kScalarsPerStgD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3633083f4f778215543e376c092745d7',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperstsa',['kScalarsPerStsA',['../structcutlass_1_1gemm_1_1GemmConfig.html#accc95abc55880abdab92253367b4b186',1,'cutlass::gemm::GemmConfig::kScalarsPerStsA()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html#ae396f7301f934c179e054f68f0420edf',1,'cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsPerStsA()']]], + ['kscalarsperstsb',['kScalarsPerStsB',['../structcutlass_1_1gemm_1_1GemmConfig.html#ac0c8c027e3ede14b62d7c7d519551f21',1,'cutlass::gemm::GemmConfig::kScalarsPerStsB()'],['../structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html#a47d99d98c783cf1d317698bd465ffa9a',1,'cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsPerStsB()']]], + ['kscalarsperstsd',['kScalarsPerStsD',['../structcutlass_1_1gemm_1_1GemmConfig.html#a3087cdd38e2c65ad0dffdd0587d2cce0',1,'cutlass::gemm::GemmConfig']]], + ['kscalarsperthread',['kScalarsPerThread',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#ae0b53d76096f9d34df6e16280565c7b1',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kScalarsPerThread()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#abb5fdb164b09c8f74f92278f3d68b95f',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kScalarsPerThread()']]], + ['kskew',['kSkew',['../structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html#ace14ca9ad11e2cdafcd4a4b63c0df591',1,'cutlass::gemm::GemmSharedStoreTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html#aba6decf87d770becaadd610d9fc27491',1,'cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#aaffe67e519e919bf561142e05da6e6c8',1,'cutlass::gemm::GemmSharedLoadTileATraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#ac9cd90ecd02809060a2fe6e2da4210f9',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a48baee6541e6359753f1bae5bd864029',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kSkew()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a7e9ce187e12575f0ecd39b2bfe13dddf',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kSkew()']]], + ['kstages',['kStages',['../structcutlass_1_1gemm_1_1GemmConfig.html#a221949c289057e39d439ce03a5b01c52',1,'cutlass::gemm::GemmConfig']]], + ['kstrideh',['kStrideH',['../structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html#a87918f4d67a9c1e19dcd3c6bfc243e97',1,'cutlass::gemm::GemmGlobalTileCdTraits']]], + ['kthreads',['kThreads',['../structcutlass_1_1gemm_1_1Gemm.html#a41239809be4ebc730dd8ff28c9efc58b',1,'cutlass::gemm::Gemm::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html#a05039ba8b7d9890903064b1a834dcd3e',1,'cutlass::gemm::GemmSharedStoreTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html#a8325bc9d56155ecb6f2ddbd56f4ed23d',1,'cutlass::gemm::GemmSharedLoadTileDTraits::kThreads()'],['../structcutlass_1_1gemm_1_1GemmConfig.html#a0b2be601de08848afc4418adb97255bf',1,'cutlass::gemm::GemmConfig::kThreads()'],['../structcutlass_1_1TileTraitsStrideMajor.html#a2b6ad449269a178018f02b8cc64ddb85',1,'cutlass::TileTraitsStrideMajor::kThreads()'],['../structcutlass_1_1TileTraitsContiguousMajor.html#a53d10552356855bf7379632e72bbe0c9',1,'cutlass::TileTraitsContiguousMajor::kThreads()'],['../structcutlass_1_1TileTraitsWarpRake.html#a11d943e15e397cbc5233b09071dff642',1,'cutlass::TileTraitsWarpRake::kThreads()'],['../structcutlass_1_1TileTraitsStandard.html#a9cbcbe09aa6e9465b63dd22d59435af1',1,'cutlass::TileTraitsStandard::kThreads()']]], + ['kthreadsperwarp',['kThreadsPerWarp',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#a4246185b8279f245ef5d0650c1eec14f',1,'cutlass::gemm::GemmSharedLoadTileATraits::kThreadsPerWarp()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a049b0bcdf8c5318ee84edeb1e42eaf78',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kThreadsPerWarp()']]], + ['kusage',['kUsage',['../structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html#a962ffde3b3db78792b67dd1f57ab0a05',1,'cutlass::gemm::GemmMultiplicandTraits']]], + ['kvalue',['kValue',['../structcutlass_1_1Extent.html#a2cb62986b9a7c168bf79b083f33c4bad',1,'cutlass::Extent::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html#a10f7184a9a50de0268efa45dab5dc304',1,'cutlass::Extent< Vector< T, Lanes > >::kValue()'],['../structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html#a87917a6dfbb1662416c4ea4831669aaf',1,'cutlass::Extent< Vector< T, Lanes > const >::kValue()']]], + ['kw',['kW',['../structcutlass_1_1Shape.html#a78836a20250ff24c25a6622ad818b421',1,'cutlass::Shape']]], + ['kwarpcount',['kWarpCount',['../structcutlass_1_1TileTraitsWarpRake.html#a7a03abe44862077351b0a0a2818d214d',1,'cutlass::TileTraitsWarpRake::kWarpCount()'],['../structcutlass_1_1TileTraitsStandard.html#a1e8f90991e179d13971b84494c989d25',1,'cutlass::TileTraitsStandard::kWarpCount()']]], + ['kwarps',['kWarps',['../structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html#af78a275086a297bd93aed920f57a17be',1,'cutlass::gemm::GemmSharedLoadTileATraits::kWarps()'],['../structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html#a8b8d6a26a29d5477f526d9ce8c27e3e2',1,'cutlass::gemm::GemmSharedLoadTileBTraits::kWarps()']]], + ['kwarpscontiguous',['kWarpsContiguous',['../structcutlass_1_1TileTraitsWarpRake.html#aede0832e95df911b1e6e3f1cc9e593ce',1,'cutlass::TileTraitsWarpRake']]], + ['kwarpsize',['kWarpSize',['../structcutlass_1_1gemm_1_1GemmConfig.html#a677d6a1711cc756b817095b7437cce0e',1,'cutlass::gemm::GemmConfig::kWarpSize()'],['../structcutlass_1_1TileTraitsWarpRake.html#ad25fb7c1b5dc8c5828a69e5a468f490b',1,'cutlass::TileTraitsWarpRake::kWarpSize()'],['../structcutlass_1_1TileTraitsStandard.html#ae9f40eb177c440f01adcc2fe9ca7ec10',1,'cutlass::TileTraitsStandard::kWarpSize()']]], + ['kwarpsstrided',['kWarpsStrided',['../structcutlass_1_1TileTraitsWarpRake.html#a8b1d3fe590f426ce11d597bb98c51bd4',1,'cutlass::TileTraitsWarpRake']]], + ['kwc',['kWc',['../structcutlass_1_1ShapeCount.html#aac5c49469aa80d119c2006291b431276',1,'cutlass::ShapeCount']]], + ['kwordcount',['kWordCount',['../structcutlass_1_1PredicateVector.html#a734bbfaf3829f73ef0b44fa7db4ccd42',1,'cutlass::PredicateVector']]] +]; diff --git a/docs/generated-html/search/variables_9.html b/docs/generated-html/search/variables_9.html new file mode 100644 index 0000000000..12136613ec --- /dev/null +++ b/docs/generated-html/search/variables_9.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_9.js b/docs/generated-html/search/variables_9.js new file mode 100644 index 0000000000..d7ebf99aab --- /dev/null +++ b/docs/generated-html/search/variables_9.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['lda',['lda',['../structcutlass_1_1gemm_1_1GemmDesc.html#a62ad30ba419ccb661e6700da98221789',1,'cutlass::gemm::GemmDesc']]], + ['ldb',['ldb',['../structcutlass_1_1gemm_1_1GemmDesc.html#a7591ce0223b0d05c4d6fca6c67b98bfe',1,'cutlass::gemm::GemmDesc']]], + ['ldc',['ldc',['../structcutlass_1_1gemm_1_1GemmDesc.html#a0f492560cabc45cd492da65b819d09db',1,'cutlass::gemm::GemmDesc']]], + ['ldd',['ldd',['../structcutlass_1_1gemm_1_1GemmDesc.html#a3280e5c5484f5c10d1412bcb70eb77e9',1,'cutlass::gemm::GemmDesc']]], + ['load',['load',['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#aea5ed35a44624684ffa9ada9d09a8893',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage']]], + ['load_5fiterator',['load_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a42ffcba6af2b5ddfb1f4825a34d43532',1,'cutlass::gemm::GlobalLoadStreamBase::Params::load_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a3be938f8661f9cd10966866b7b80b471',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::load_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#ad2381f2311ee8400a2dc57c19084ef5e',1,'cutlass::gemm::GlobalLoadStreamBase::load_iterator()']]] +]; diff --git a/docs/generated-html/search/variables_a.html b/docs/generated-html/search/variables_a.html new file mode 100644 index 0000000000..24819a377b --- /dev/null +++ b/docs/generated-html/search/variables_a.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_a.js b/docs/generated-html/search/variables_a.js new file mode 100644 index 0000000000..131fefbd75 --- /dev/null +++ b/docs/generated-html/search/variables_a.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['m',['m',['../structcutlass_1_1gemm_1_1GemmDesc.html#a5c2b3e75cb6873762ba3f85487b78579',1,'cutlass::gemm::GemmDesc::m()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#ac344bf5ca318dc343bd6fa6bf52d2e22',1,'cutlass::gemm::GemmEpilogue::m()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aaf27c0f2f4ab730ed5c865e9f7d2373b',1,'cutlass::gemm::GemmTraits::Params::m()']]], + ['main_5floop',['main_loop',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html#aa5dd7edc3cffa785eb1e5b62c18c74c4',1,'cutlass::gemm::GemmTraits::SharedStorage']]] +]; diff --git a/docs/generated-html/search/variables_b.html b/docs/generated-html/search/variables_b.html new file mode 100644 index 0000000000..b306931e2e --- /dev/null +++ b/docs/generated-html/search/variables_b.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_b.js b/docs/generated-html/search/variables_b.js new file mode 100644 index 0000000000..97f3b3b158 --- /dev/null +++ b/docs/generated-html/search/variables_b.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['n',['N',['../structcutlass_1_1Coord.html#a3f2f5a9d7ef2063456c4d9f7e57e71ca',1,'cutlass::Coord::N()'],['../structcutlass_1_1gemm_1_1GemmDesc.html#acee9727aa6cb612a25cd6ced4829061a',1,'cutlass::gemm::GemmDesc::n()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a9cc371cd2f1a9485583afdacbb7403ea',1,'cutlass::gemm::GemmEpilogue::n()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a437d4b6f1f149849c5ae635a5993e7ac',1,'cutlass::gemm::GemmTraits::Params::n()']]] +]; diff --git a/docs/generated-html/search/variables_c.html b/docs/generated-html/search/variables_c.html new file mode 100644 index 0000000000..75709df8f5 --- /dev/null +++ b/docs/generated-html/search/variables_c.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_c.js b/docs/generated-html/search/variables_c.js new file mode 100644 index 0000000000..4774d499d1 --- /dev/null +++ b/docs/generated-html/search/variables_c.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['params',['params',['../structcutlass_1_1gemm_1_1Gemm.html#a3c292637ab0ec8e73856d0cf6efb6da2',1,'cutlass::gemm::Gemm::params()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a81b028a18df51d3caa1b0ba0c990e362',1,'cutlass::gemm::GemmEpilogue::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#ab8c79cb1a8157dd00429c93cb4a41322',1,'cutlass::gemm::GemmGlobalIteratorAb::params()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ac368b1ea1c5ad2209a6ac6bec597600f',1,'cutlass::gemm::GemmGlobalIteratorCd::params()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#a0ad4218ad2c10641379b236473e79e84',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::params()'],['../structcutlass_1_1TileLoadIterator.html#aaafe35622751532971c1b7efc54c888b',1,'cutlass::TileLoadIterator::params()'],['../structcutlass_1_1TileStoreIterator.html#a5e6c00b99e0f752137b07f7059f6ee0f',1,'cutlass::TileStoreIterator::params()']]], + ['pointer',['pointer',['../structcutlass_1_1FragmentIterator.html#af667793926cdb24d701eb75e0345bbd6',1,'cutlass::FragmentIterator::pointer()'],['../structcutlass_1_1FragmentConstIterator.html#aee37f8ea06127b94a304bb776945509b',1,'cutlass::FragmentConstIterator::pointer()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#ad764f98e770d4685006e6888214dcd4d',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa42c4e7419308926b925909e6a5c719d',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::pointer()'],['../structcutlass_1_1TileLoadIterator_1_1Params.html#a6608f7027994aaebdefd004fe94153d9',1,'cutlass::TileLoadIterator::Params::pointer()'],['../structcutlass_1_1TileStoreIterator_1_1Params.html#a6bbadae6b13aef8f31a77cacd88b068b',1,'cutlass::TileStoreIterator::Params::pointer()']]], + ['predicate_5finc_5fadvance',['predicate_inc_advance',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a2b5d2b02d241e89677c41eb658ace129',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_advance()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#aa0367d016549cce6bd896bae364fc248',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_advance()']]], + ['predicate_5finc_5fh',['predicate_inc_h',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a36afe18f94aacd0746c8946866371d3c',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_inc_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5b8177a936ba30a3d68ca238aaf76ff6',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_inc_h()']]], + ['predicate_5foffset',['predicate_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a3e8f6cf08d23318f3e3263b55cf3b84a',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::predicate_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a38f13119cf3111e84914f1bef6f5d985',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::predicate_offset()']]], + ['predicates',['predicates',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#af323c9db74f0de3376edd35eb377bc9c',1,'cutlass::gemm::GemmGlobalIteratorAb::predicates()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#ad23e6224e37ec1d13dc237ce8ec6e977',1,'cutlass::gemm::GemmGlobalIteratorCd::predicates()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#af3c9d62554b1d311d82ba89e09cdd3fa',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::predicates()']]] +]; diff --git a/docs/generated-html/search/variables_d.html b/docs/generated-html/search/variables_d.html new file mode 100644 index 0000000000..34c80a4863 --- /dev/null +++ b/docs/generated-html/search/variables_d.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_d.js b/docs/generated-html/search/variables_d.js new file mode 100644 index 0000000000..c72e153966 --- /dev/null +++ b/docs/generated-html/search/variables_d.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['rank',['Rank',['../classcutlass_1_1TensorRef.html#a22ac53a60e63a743613e732586ad0c66',1,'cutlass::TensorRef::Rank()'],['../classcutlass_1_1TensorView.html#a22c39e8cf314884c5d523914cf4cac90',1,'cutlass::TensorView::Rank()']]], + ['registers',['registers',['../unioncutlass_1_1Vector.html#a29dab07949206cc1609543ffcefd1e5a',1,'cutlass::Vector::registers()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#abd116dc7a5b82ac9b1481fb1d2bfc93f',1,'cutlass::Vector< half, kLanes_ >::registers()']]] +]; diff --git a/docs/generated-html/search/variables_e.html b/docs/generated-html/search/variables_e.html new file mode 100644 index 0000000000..4a1c8a614c --- /dev/null +++ b/docs/generated-html/search/variables_e.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_e.js b/docs/generated-html/search/variables_e.js new file mode 100644 index 0000000000..0f47e54982 --- /dev/null +++ b/docs/generated-html/search/variables_e.js @@ -0,0 +1,19 @@ +var searchData= +[ + ['scalars',['scalars',['../unioncutlass_1_1Vector.html#a091080b4e9db9e89734f44ceb985d78f',1,'cutlass::Vector::scalars()'],['../unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html#ab4a119a4813f80aa10c25e32f8b115f3',1,'cutlass::Vector< half, kLanes_ >::scalars()']]], + ['shared',['shared',['../unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html#afabd328b106d45b156200f73942d211e',1,'cutlass::gemm::GemmTraits::StreamSharedStorage']]], + ['shared_5fload_5fiterator_5fd',['shared_load_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a1742e43c128665f0ca39cb578291df81',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstorage',['shared_storage',['../structcutlass_1_1gemm_1_1Gemm.html#a6b0119ed8d92698dab4de68987c8cc1b',1,'cutlass::gemm::Gemm::shared_storage()'],['../structcutlass_1_1gemm_1_1GemmEpilogue.html#a442b5b5688cd658c3b3476650c00281e',1,'cutlass::gemm::GemmEpilogue::shared_storage()']]], + ['shared_5fstore_5fiterator_5fd',['shared_store_iterator_d',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#af79a0c74a4c30ccec59b393721b5dfc1',1,'cutlass::gemm::GemmEpilogueTraits::Params']]], + ['shared_5fstream',['shared_stream',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html#ae63b5a52106dbd37ea304196335ec210',1,'cutlass::gemm::GemmEpilogueTraits::SharedStorage']]], + ['shared_5fstream_5fa',['shared_stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#aa9937ec51d18aad02398d95095117978',1,'cutlass::gemm::GemmTraits::Params']]], + ['shared_5fstream_5fb',['shared_stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html#a78f22007632937bbd5f3dab7b097477d',1,'cutlass::gemm::GemmTraits::Params']]], + ['stage',['stage',['../structcutlass_1_1TileLoadIterator.html#aa3fd9859de68d76e07ebee06c6ccee92',1,'cutlass::TileLoadIterator::stage()'],['../structcutlass_1_1TileStoreIterator.html#ae435b72b15eca46eb871446d92bd316e',1,'cutlass::TileStoreIterator::stage()']]], + ['store',['store',['../unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html#a1f31090613c4e6f0895f598880d6c4e5',1,'cutlass::gemm::GemmEpilogueTraits::StreamSharedStorage']]], + ['store_5fiterator',['store_iterator',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html#a3e5167fa3f2dc0d8b4b903bd4e936969',1,'cutlass::gemm::GlobalLoadStreamBase::Params::store_iterator()'],['../unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html#a939e9ddecc5ee97882a54211a61f5586',1,'cutlass::gemm::GlobalLoadStreamBase::SharedStorage::store_iterator()'],['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a0eafd1e245946bd1b9d228ad7d2d0dae',1,'cutlass::gemm::GlobalLoadStreamBase::store_iterator()']]], + ['stream_5fa',['stream_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a62d3dcf5d97a0a896b2033e55dfb0811',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#a82a59524b5d3134eb609d280193a5c47',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_a()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a8e68561561ac6b08efbfd116903198c8',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_a()']]], + ['stream_5fb',['stream_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html#a0173fcc8856b17a52cc5eee845f101fa',1,'cutlass::gemm::GemmTraits::MainLoopSharedStorage::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html#acc287ce5e2f3635d9d55d91914d2d04c',1,'cutlass::gemm::GemmTraits::GlobalLoadStream::stream_b()'],['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a1fdc6af44c14c88a94529d187fda176d',1,'cutlass::gemm::GemmTraits::SharedLoadStream::stream_b()']]], + ['stride_5fd',['stride_d',['../structcutlass_1_1TileIteratorBase_1_1Params.html#ad67234ec264354a22032bb2519575dc1',1,'cutlass::TileIteratorBase::Params']]], + ['stride_5fh',['stride_h',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#ae0fdc7426b22ff2c20f077e251ebc823',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_h()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html#a0c6b03c635e14ad4424a83f8c7f8025e',1,'cutlass::gemm::GemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html#a5cff0436eed0fefa2957ad6d083ed007',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::Params::stride_h()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a58e8c883aea4cfdfa5a84c25a4704ebc',1,'cutlass::TileIteratorBase::Params::stride_h()']]], + ['stride_5fw',['stride_w',['../structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html#a565f6cab8925d632dcf24bd1974caca2',1,'cutlass::gemm::GemmEpilogueTraits::Params::stride_w()'],['../structcutlass_1_1TileIteratorBase_1_1Params.html#a313984457c78eea66c980f6813047b9c',1,'cutlass::TileIteratorBase::Params::stride_w()']]] +]; diff --git a/docs/generated-html/search/variables_f.html b/docs/generated-html/search/variables_f.html new file mode 100644 index 0000000000..cc86fb590c --- /dev/null +++ b/docs/generated-html/search/variables_f.html @@ -0,0 +1,30 @@ + + + + + + + + + +
    +
    Loading...
    +
    + +
    Searching...
    +
    No Matches
    + +
    + + diff --git a/docs/generated-html/search/variables_f.js b/docs/generated-html/search/variables_f.js new file mode 100644 index 0000000000..834a84b7ce --- /dev/null +++ b/docs/generated-html/search/variables_f.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['thread_5foffset',['thread_offset',['../structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html#a1864c5556529afdc8445021cad780b04',1,'cutlass::gemm::GemmGlobalIteratorAb::thread_offset()'],['../structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html#a56601dc34e8f9a070db5dc48c37d55a0',1,'cutlass::gemm::GemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html#ab3057dad7a4decb5594c66aa328f8066',1,'cutlass::gemm::WmmaGemmGlobalIteratorCd::thread_offset()'],['../structcutlass_1_1TileLoadIterator.html#a7726cdd4fe056c59bb04adb9e5504457',1,'cutlass::TileLoadIterator::thread_offset()'],['../structcutlass_1_1TileStoreIterator.html#a350f5beea87d811f43c55519bc0b9035',1,'cutlass::TileStoreIterator::thread_offset()']]], + ['transformed_5fa',['transformed_a',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a883b28ca237b1ec076856232cfee0c6f',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5fb',['transformed_b',['../structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html#a9369a5f819d2a42997491e0df96f47ef',1,'cutlass::gemm::GemmTraits::SharedLoadStream']]], + ['transformed_5ffragment',['transformed_fragment',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#afa97cb1cfebca0d6977b1c8318bedddf',1,'cutlass::gemm::GlobalLoadStreamBase']]], + ['transformer',['transformer',['../structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html#a868f82ee87aba37b05721fe8210221c9',1,'cutlass::gemm::GlobalLoadStreamBase::transformer()'],['../structcutlass_1_1gemm_1_1SharedLoadStream.html#af846390ad0e5b80ccb4e8b95c5fe64a7',1,'cutlass::gemm::SharedLoadStream::transformer()']]] +]; diff --git a/docs/generated-html/sgemm__traits_8h.html b/docs/generated-html/sgemm__traits_8h.html new file mode 100644 index 0000000000..4dd76bfcc8 --- /dev/null +++ b/docs/generated-html/sgemm__traits_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: sgemm_traits.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    sgemm_traits.h File Reference
    +
    + + + + + diff --git a/docs/generated-html/sgemm__traits_8h_source.html b/docs/generated-html/sgemm__traits_8h_source.html new file mode 100644 index 0000000000..965c549700 --- /dev/null +++ b/docs/generated-html/sgemm__traits_8h_source.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: sgemm_traits.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    sgemm_traits.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/gemm/gemm.h>
    37 
    38 namespace cutlass {
    39 namespace gemm {
    40 
    42 
    43 template <
    45  typename OutputTile_,
    47  typename AccumulatorsPerThread_,
    49  int kScalarsPerLdgA_ = 1,
    51  int kScalarsPerLdgB_ = 1>
    53  : public GemmConfig<
    55  float,
    57  float,
    59  float,
    61  float,
    63  OutputTile_,
    65  ThreadMultiplyAdd<AccumulatorsPerThread_, Shape<1, 4, 8>, float, float, float>,
    67  kScalarsPerLdgA_,
    69  kScalarsPerLdgA_,
    71  4,
    73  kScalarsPerLdgB_,
    75  kScalarsPerLdgB_,
    77  4,
    79  1,
    81  4,
    83  1,
    85  2> {};
    86 
    88 
    89 template <
    91  MatrixLayout::Kind kLayoutA_,
    93  MatrixLayout::Kind kLayoutB_,
    95  typename OutputTile_ = Shape<8, 128, 128>,
    97  typename EpilogueFunctor_ = LinearScaling<float>,
    99  typename AccumulatorsPerThread_ = Shape<8, 8, 8>,
    101  int kScalarsPerLdgA_ = 1,
    103  int kScalarsPerLdgB_ = 1,
    105  typename Index_ = int,
    107  typename GemmConfig_ =
    110  typename GemmEpilogueTraits_ =
    113  // The layout for A.
    114  kLayoutA_,
    115  // The layout for B.
    116  kLayoutB_,
    117  // The config.
    118  GemmConfig_,
    119  // The epilogue.
    120  GemmEpilogue<GemmEpilogueTraits_>,
    121  // The index.
    122  Index_> {};
    123 
    125 
    126 } // namespace gemm
    127 } // namespace cutlass
    Definition: convert.h:33
    +
    Defines iterators for efficiently loading and storing to global memory.
    +
    Defines structural properties of complete GEMM computation.
    +
    Definition: sgemm_traits.h:52
    +
    Template implementing matrix multiply-add operations on fragments.
    +
    Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
    +
    Defines iterators for efficiently loading and storing tiles to and from shared memory.
    +
    Definition: gemm_traits.h:79
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Definition: gemm_epilogue_traits.h:300
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Definition: sgemm_traits.h:112
    +
    Functor to compute linear combination of fragments.
    Definition: linear_scaling.h:40
    +
    Implements a software-pipelined efficient GEMM.
    +
    Defines structural properties of the GEMM epilogue.
    +
    Definition: gemm_traits.h:723
    +
    + + + + diff --git a/docs/generated-html/shape_8h.html b/docs/generated-html/shape_8h.html new file mode 100644 index 0000000000..483edeeb0f --- /dev/null +++ b/docs/generated-html/shape_8h.html @@ -0,0 +1,154 @@ + + + + + + + +Cutlass: shape.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    shape.h File Reference
    +
    +
    + +

    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects. +More...

    +
    #include <cutlass/cutlass.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::Shape< kD_, kH_, kW_, kC_ >
     A Shape implementing Layout Concept describing the dimensions of a cube. More...
     
    struct  cutlass::ShapeCount< Shape >
     Compute derived counted of a Layout Concept based class. More...
     
    struct  cutlass::ShapeScale< A_, kScale_ >
     
    struct  cutlass::ShapeAdd< A_, B_ >
     
    struct  cutlass::ShapeSub< A_, B_ >
     
    struct  cutlass::ShapeMul< A_, B_ >
     
    struct  cutlass::ShapeDiv< A_, B_ >
     
    struct  cutlass::ShapeMax< A_, B_ >
     
    struct  cutlass::ShapeMin< A_, B_ >
     
    struct  cutlass::ShapeStrides< Shape_ >
     
    struct  cutlass::ComputeOffsetFromShape< Shape_ >
     Compute the offset for the given coordinates in a cube. More...
     
    struct  cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  cutlass::ComputeOffsetFromStrides< Strides_ >
     Compute the offset for the given coordinates in a cube. More...
     
    struct  cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >
     Compute the offset for the given coordinates in a cube with a depth of 1. More...
     
    struct  cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >
     Compute the offset for the given coordinates in a cube with one channel and a depth of 1. More...
     
    struct  cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >
     Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. More...
     
    struct  cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >
     Specialization for D=1. More...
     
    struct  cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >
     Specialization for D=1 and C=1. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/shape_8h_source.html b/docs/generated-html/shape_8h_source.html new file mode 100644 index 0000000000..5b980198be --- /dev/null +++ b/docs/generated-html/shape_8h_source.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: shape.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    shape.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/cutlass.h>
    31 
    32 namespace cutlass {
    33 
    35 
    63 template <int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    64 struct Shape {
    66  static int const kD = kD_;
    68  static int const kH = kH_;
    70  static int const kW = kW_;
    72  static int const kC = kC_;
    73 };
    74 
    78 template <typename Shape>
    79 struct ShapeCount {
    81  static int const kWc = Shape::kW * Shape::kC;
    83  static int const kHw = Shape::kH * Shape::kW;
    85  static int const kHwc = Shape::kH * kWc;
    87  static int const kDhw = Shape::kD * kHw;
    89  static int const kDhwc = Shape::kD * kHwc;
    91  static int const kCount = kDhwc;
    92 };
    93 
    95 
    96 template <typename A_, int kScale_>
    97 struct ShapeScale {
    99 };
    100 
    102 
    103 template <typename A_, typename B_>
    104 struct ShapeAdd {
    106 };
    107 
    109 
    110 template <typename A_, typename B_>
    111 struct ShapeSub {
    112  typedef Shape<A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC> Shape;
    113 };
    114 
    116 
    117 template <typename A_, typename B_>
    118 struct ShapeMul {
    120 };
    121 
    123 
    124 template <typename A_, typename B_>
    125 struct ShapeDiv {
    126  typedef Shape<A_::kD / B_::kD, A_::kH / B_::kH, A_::kW / B_::kW, A_::kC / B_::kC> Shape;
    127 };
    128 
    130 
    131 template <typename A_, typename B_>
    132 struct ShapeMax {
    133  typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD),
    134  (A_::kH > B_::kH ? A_::kH : B_::kH),
    135  (A_::kW > B_::kW ? A_::kW : B_::kW),
    136  (A_::kC > B_::kC ? A_::kC : B_::kC)>
    138 };
    139 
    141 
    142 template <typename A_, typename B_>
    143 struct ShapeMin {
    144  typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD),
    145  (A_::kH < B_::kH ? A_::kH : B_::kH),
    146  (A_::kW < B_::kW ? A_::kW : B_::kW),
    147  (A_::kC < B_::kC ? A_::kC : B_::kC)>
    149 };
    150 
    152 
    153 template <typename Shape_>
    154 struct ShapeStrides {
    156 };
    157 
    159 
    164 template <typename Shape_>
    166  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    167  // clang-format off
    168  return d * Shape_::kH * Shape_::kW * Shape_::kC +
    169  h * Shape_::kW * Shape_::kC +
    170  w * Shape_::kC +
    171  c;
    172  // clang-format on
    173  }
    174 };
    175 
    177 
    184 template <int kSh_, int kSw_, int kSc_>
    185 struct ComputeOffsetFromShape<Shape<1, kSh_, kSw_, kSc_> > {
    186  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    187  return h * kSw_ * kSc_ + w * kSc_ + c;
    188  }
    189 };
    190 
    192 
    198 template <int kSh_, int kSw_>
    199 struct ComputeOffsetFromShape<Shape<1, kSh_, kSw_, 1> > {
    200  static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * kSw_ + w; }
    201 };
    202 
    204 
    209 template <typename Strides_>
    211  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    212  return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC;
    213  }
    214 };
    215 
    217 
    224 template <int S_h_, int S_w_, int S_c_>
    225 struct ComputeOffsetFromStrides<Shape<1, S_h_, S_w_, S_c_> > {
    226  static CUTLASS_DEVICE int get(int d, int h, int w, int c) {
    227  return h * S_h_ + w * S_w_ + c * S_c_;
    228  }
    229 };
    230 
    232 
    238 template <int S_h_, int S_w_>
    239 struct ComputeOffsetFromStrides<Shape<1, S_h_, S_w_, 1> > {
    240  static CUTLASS_DEVICE int get(int d, int h, int w, int c) { return h * S_h_ + w * S_w_; }
    241 };
    242 
    244 
    251 template <typename Threads_, typename Strides_>
    253  static CUTLASS_DEVICE int get() {
    254  // Decompose the thread index.
    255  int c = threadIdx.x % Threads_::kC;
    256  int w = threadIdx.x / Threads_::kC % Threads_::kW;
    257  int h = threadIdx.x / Threads_::kC / Threads_::kW % Threads_::kH;
    258  int d = threadIdx.x / Threads_::kC / Threads_::kW / Threads_::kH;
    259 
    260  // Compute the offset.
    261  return d * Strides_::kD + h * Strides_::kH + w * Strides_::kW + c * Strides_::kC;
    262  }
    263 };
    264 
    266 
    269 template <int T_h_, int T_w_, int T_c_, int S_h_, int S_w_, int S_c_>
    270 struct ComputeThreadOffsetFromStrides<Shape<1, T_h_, T_w_, T_c_>, Shape<1, S_h_, S_w_, S_c_> > {
    271  static CUTLASS_DEVICE int get() {
    272  // Decompose the thread index.
    273  int c = threadIdx.x % T_c_;
    274  int w = threadIdx.x / T_c_ % T_w_;
    275  int h = threadIdx.x / T_c_ / T_w_ % T_h_;
    276 
    277  // Compute the offset.
    278  return h * S_h_ + w * S_w_ + c * S_c_;
    279  }
    280 };
    281 
    283 
    287 template <int T_h_, int T_w_, int S_h_, int S_w_>
    288 struct ComputeThreadOffsetFromStrides<Shape<1, T_h_, T_w_, 1>, Shape<1, S_h_, S_w_, 1> > {
    289  static CUTLASS_DEVICE int get() {
    290  // Decompose the thread index.
    291  int w = threadIdx.x % T_w_;
    292  int h = threadIdx.x / T_w_;
    293 
    294  // Compute the offset.
    295  return h * S_h_ + w * S_w_;
    296  }
    297 };
    298 
    300 
    301 } // namespace cutlass
    Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_.
    Definition: shape.h:252
    +
    static int const kWc
    The number of elements per row.
    Definition: shape.h:81
    +
    Definition: convert.h:33
    +
    Shape< A_::kD+B_::kD, A_::kH+B_::kH, A_::kW+B_::kW, A_::kC+B_::kC > Shape
    Definition: shape.h:105
    +
    Shape< A_::kD *kScale_, A_::kH *kScale_, A_::kW *kScale_, A_::kC *kScale_ > Shape
    Definition: shape.h:98
    +
    Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
    Definition: shape.h:155
    +
    Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
    Definition: shape.h:119
    +
    Shape< A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC > Shape
    Definition: shape.h:112
    +
    Definition: shape.h:111
    +
    static int const kH
    The height of the cube.
    Definition: shape.h:68
    +
    static int const kC
    The number of scalars per element.
    Definition: shape.h:72
    +
    Definition: shape.h:97
    +
    Compute the offset for the given coordinates in a cube.
    Definition: shape.h:165
    +
    Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
    Definition: shape.h:126
    +
    static int const kDhw
    The number of pixels per cube.
    Definition: shape.h:87
    +
    Definition: shape.h:118
    +
    Definition: shape.h:125
    +
    Compute the offset for the given coordinates in a cube.
    Definition: shape.h:210
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Definition: shape.h:132
    +
    Definition: shape.h:104
    +
    static int const kCount
    The number of elements in the 4D space.
    Definition: shape.h:91
    +
    static int const kDhwc
    The number of elements in the 4D space.
    Definition: shape.h:89
    +
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    +
    Definition: shape.h:143
    +
    static int const kHw
    The number of pixels per image.
    Definition: shape.h:83
    +
    static int const kD
    The depth of the cube.
    Definition: shape.h:66
    +
    Definition: shape.h:154
    +
    Shape<(A_::kD > B_::kD ? A_::kD :B_::kD),(A_::kH > B_::kH ? A_::kH :B_::kH),(A_::kW > B_::kW ? A_::kW :B_::kW),(A_::kC > B_::kC ? A_::kC :B_::kC)> Shape
    Definition: shape.h:137
    +
    Basic include for CUTLASS macros.
    +
    Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
    Definition: shape.h:148
    +
    Compute derived counted of a Layout Concept based class.
    Definition: shape.h:79
    +
    static int const kHwc
    The number of elements per image.
    Definition: shape.h:85
    +
    + + + + diff --git a/docs/generated-html/splitbar.png b/docs/generated-html/splitbar.png new file mode 100644 index 0000000000000000000000000000000000000000..b9c27ab37f35a89762016540ee15424830b66ecd GIT binary patch literal 310 zcmeAS@N?(olHy`uVBq!ia0vp^Yzz!63>-{AmhX=Jf@Vh3%#5ZW!gSI z*O2g8ern|+p53_xlaB6E^R`})s=b?kai-Yj?;8%zs#KD`wsBX=2ATeZ%I@5^jnAtO z_pkq(|Da;uEL+=sKFfTK<#^=t-oMN5WBBALBtSA^U()|}(e3r`|H_NHUR?d^^s4%* zpljiallIMuozH*IF8=(f?z?rxKjQaa*_ONg_1jIikN)25KDohT66xqydmY2hO}6(x T1~)bU1A@WR)z4*}Q$iB}V~2c( literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1AlignedStruct.html b/docs/generated-html/structcutlass_1_1AlignedStruct.html new file mode 100644 index 0000000000..b7df247dea --- /dev/null +++ b/docs/generated-html/structcutlass_1_1AlignedStruct.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::AlignedStruct< kAlignment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::AlignedStruct< kAlignment_ > Struct Template Reference
    +
    +
    + +

    #include <vector.h>

    +
    +Inheritance diagram for cutlass::AlignedStruct< kAlignment_ >:
    +
    +
    + + +cutlass::Fragment< Element_, kElements_, kAlignment_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1AlignedStruct.png b/docs/generated-html/structcutlass_1_1AlignedStruct.png new file mode 100644 index 0000000000000000000000000000000000000000..1cdbb00dde697df821117fbdcb1a416640f0b774 GIT binary patch literal 1116 zcmeAS@N?(olHy`uVBq!ia0y~yVDtmB12~w0q@!963y_is@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|!2H_N#WAFU@$KBVMUS-vT=;#b-v2-G z`m~lG0v<&(j-1@PdFj0q4N?|o4C*-*S;9FKBRUu!FwL0$n8jc+GS)n|{A%m%TeXJo zu5J%Gd2z$C(*KpG{~qB;@U!nOp8a-KrPMdEbJg$mOQrKIT7LO+ZTFh3+W+fse6Y%^ z^=7{Oqo**l+Vst29p0(wo%Ju~`Ce|Sc4H`>b!2C*@&A{9Rz%Idw`5gq^Tf~ljQuME zTtoQoir2L|&)d?)+aM|VAg4U=Y39?q8y`OS-@5%#!0Uu*EBE({OC0Lb8td54g}CvZ z{P#*Cui~xtYk%`)+g-!MJs#CRny0*N&a%zlEf?Hx*vzq^@S!ZL*U#51>9TXbPMLnO z^26`Yi;*!Q>DDiDcU0>xJI3~0aYfhe&AVdQ-kDGO`?x$kxW72nUo~|^bt>Q%-KSAD-PHMz<9Qho8|0%?Zc zeb;3dEaq3YK2Y!3Cw#yM8T-#sYitB^@-JEr0)s{tfrWt?fFz%PGSJ$+^?YaM9!L!Y?pDsMQeY;rbg|!9S zj)mTuF8BND&6`&~>U1@~zccsxeNV0VQ@Fp&{yMlt_j$u-?(SRVWqrkq%ci~gz2Y#} ztY=!zr;;@`mh79n#OrwTo{|^p)q$7w3hzZ^I?j;&#g?*6v+VS0Yw^eCA7m=MT4K@qc;qZqNKTxthBszY(`{Jav9|_p7^eeQVE6 zt<~@G^$IyTZ~w}?E~c{Jo2TDo7UzD54LyCoSR&SBrA+AkZ`z+urd3UiowUq!j@zWP z)YQ6{#)cDR&wX;5tfIX(amFE!N&A*n+nJY7JfjlYpIGwkjq1z%%slaJC-pr4XIuJt s8clvYMa3A-uq`|$u~_?<1n(Dq$5}4n`meinfJFg=r>mdKI;Vst08KU*Gynhq literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html new file mode 100644 index 0000000000..c31427b5d7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromShape< Shape_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromShape< Shape_ >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromShape< Shape_ >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html new file mode 100644 index 0000000000..709f76ee48 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape.html @@ -0,0 +1,165 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromShape< Shape_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromShape< Shape_ > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<typename Shape_>
    +struct cutlass::ComputeOffsetFromShape< Shape_ >

    + +
    Template Parameters
    + + +
    ALayout Concept where each dimension of the cube specifies the corresponding stride.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<typename Shape_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromShape< Shape_ >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html new file mode 100644 index 0000000000..5d6fc09740 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html new file mode 100644 index 0000000000..406a86ab67 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_011_01_4_01_4.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with one channel and a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int kSh_, int kSw_>
    +struct cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >

    + +
    Template Parameters
    + + + +
    kShElements in the H dimension
    kSwElements in the W dimension
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int kSh_, int kSw_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, 1 > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html new file mode 100644 index 0000000000..9824b8c4aa --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html new file mode 100644 index 0000000000..096cdc801a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromShape_3_01Shape_3_011_00_01kSh___00_01kSw___00_01kSc___01_4_01_4.html @@ -0,0 +1,167 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int kSh_, int kSw_, int kSc_>
    +struct cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >

    + +
    Template Parameters
    + + + + +
    kShElements in the H dimension
    kSwElements in the W dimension
    kScSeparation between two elements in "elements"
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int kSh_, int kSw_, int kSc_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromShape< Shape< 1, kSh_, kSw_, kSc_ > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html new file mode 100644 index 0000000000..369de9ff61 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromStrides< Strides_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromStrides< Strides_ >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromStrides< Strides_ >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html new file mode 100644 index 0000000000..5c3254d1f4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides.html @@ -0,0 +1,165 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromStrides< Strides_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromStrides< Strides_ > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<typename Strides_>
    +struct cutlass::ComputeOffsetFromStrides< Strides_ >

    + +
    Template Parameters
    + + +
    ALayout Concept where each dimension of the cube specifies the corresponding stride.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<typename Strides_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromStrides< Strides_ >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html new file mode 100644 index 0000000000..f37d830776 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html new file mode 100644 index 0000000000..a12a5aafd2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_011_01_4_01_4.html @@ -0,0 +1,166 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with one channel and a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int S_h_, int S_w_>
    +struct cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >

    + +
    Template Parameters
    + + + +
    S_hStride in the H dimension in scalars
    S_wStride in the W dimension in scalars
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int S_h_, int S_w_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, 1 > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html new file mode 100644 index 0000000000..2f54eda3db --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >, including all inherited members.

    + + +
    get(int d, int h, int w, int c)cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html new file mode 100644 index 0000000000..be85a7c80b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeOffsetFromStrides_3_01Shape_3_011_00_01S__h___00_01S__w___00_01S__c___01_4_01_4.html @@ -0,0 +1,167 @@ + + + + + + + +Cutlass: cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference
    +
    +
    + +

    Compute the offset for the given coordinates in a cube with a depth of 1. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get (int d, int h, int w, int c)
     
    +

    Detailed Description

    +

    template<int S_h_, int S_w_, int S_c_>
    +struct cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >

    + +
    Template Parameters
    + + + + +
    S_hStride in the H dimension in scalars
    S_wStride in the W dimension in scalars
    S_cStride between two scalars.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int S_h_, int S_w_, int S_c_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeOffsetFromStrides< Shape< 1, S_h_, S_w_, S_c_ > >::get (int d,
    int h,
    int w,
    int c 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html new file mode 100644 index 0000000000..104116b7dc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >, including all inherited members.

    + + +
    get()cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html new file mode 100644 index 0000000000..d434e920fc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ > Struct Template Reference
    +
    +
    + +

    Decompose threadId.x into coordinate of a cube whose dimensions are specified by Threads_. Afterwards compute the offset of those coordinates using Strides_. + More...

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get ()
     
    +

    Detailed Description

    +

    template<typename Threads_, typename Strides_>
    +struct cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >

    + +
    Template Parameters
    + + + +
    Threads_The dimension of the cube the threadIdx.x value is mapped on
    Strides_The strides to use when compute the offsets based on the coordinates of the cube.
    +
    +
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<typename Threads_ , typename Strides_ >
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeThreadOffsetFromStrides< Threads_, Strides_ >::get ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html new file mode 100644 index 0000000000..be0dd1975f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_03ed682791cf043da79a7cc93228a8c85.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html new file mode 100644 index 0000000000..55392ee2d8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_011_0e75281d7e02fa191f5d498e10e25dc1b.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > > Struct Template Reference
    +
    +
    + +

    Specialization for D=1 and C=1. +

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get ()
     
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int T_h_, int T_w_, int S_h_, int S_w_>
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, 1 >, Shape< 1, S_h_, S_w_, 1 > >::get ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html new file mode 100644 index 0000000000..7a2a0294f5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__dd54c41f6edb97d3c208cb7c6fe4ab9b.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > Struct Template Reference
    +
    +
    + +

    Specialization for D=1. +

    + +

    #include <shape.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE int get ()
     
    +

    Member Function Documentation

    + +

    ◆ get()

    + +
    +
    +
    +template<int T_h_, int T_w_, int T_c_, int S_h_, int S_w_, int S_c_>
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE int cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > >::get ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html new file mode 100644 index 0000000000..80afb2f90c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ComputeThreadOffsetFromStrides_3_01Shape_3_011_00_01T__h___00_01T__w___00_01T__f2e6d84a53db391977c787a65ed62aca.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ComputeThreadOffsetFromStrides< Shape< 1, T_h_, T_w_, T_c_ >, Shape< 1, S_h_, S_w_, S_c_ > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html new file mode 100644 index 0000000000..12404785c2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html new file mode 100644 index 0000000000..6e00d30bee --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ConstPredicateTileAdapter.html @@ -0,0 +1,231 @@ + + + + + + + +Cutlass: cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference
    +
    +
    + +

    Adapter to enable random access to predicates via logical coordinate within a tile. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + +

    +Public Types

    typedef PredicateVector_ PredicateVector
     The vector of predicates. More...
     
    typedef Iterations_ Iterations
     The iterations. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ConstPredicateTileAdapter (PredicateVector const &predicates_)
     Ctor. More...
     
    CUTLASS_DEVICE bool at (int d, int h, int w, int c) const
     Get the value at location (d, h, w, c). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef Iterations_ cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef PredicateVector_ cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::PredicateVector
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ConstPredicateTileAdapter()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::ConstPredicateTileAdapter (PredicateVector const & predicates_)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::ConstPredicateTileAdapter< PredicateVector_, Iterations_ >::at (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Convert.html b/docs/generated-html/structcutlass_1_1Convert.html new file mode 100644 index 0000000000..5b2e67b918 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Convert.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::Convert< InputFragment_, OutputFragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Convert< InputFragment_, OutputFragment_ > Struct Template Reference
    +
    +
    + +

    #include <convert.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html new file mode 100644 index 0000000000..559a16fc54 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ob568b5e19b6f78a5fa50d1f821f0bc2a.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html new file mode 100644 index 0000000000..daf9a756fb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Convert_3_01Fragment_3_01InputScalar___00_01kScalars___01_4_00_01Fragment_3_01Ofca5985d18bcb54bc1f49355f3cee121.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > > Struct Template Reference
    +
    +
    + +

    #include <convert.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment< InputScalar_, kScalars_ > InputFragment
     The input fragment. More...
     
    typedef Fragment< OutputScalar_, kScalars_ > OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Convert ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (InputFragment const &src, OutputFragment &dst)
     Transform a fragment. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void transform (Fragment_ const &src, int offset, OutputFragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + +
    typedef Fragment<InputScalar_, kScalars_> cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + +
    typedef Fragment<OutputScalar_, kScalars_> cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ Convert()

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::Convert ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform (InputFragment const & src,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<typename InputScalar_ , typename OutputScalar_ , int kScalars_>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Convert< Fragment< InputScalar_, kScalars_ >, Fragment< OutputScalar_, kScalars_ > >::transform (Fragment_ const & src,
    int offset,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Coord-members.html b/docs/generated-html/structcutlass_1_1Coord-members.html new file mode 100644 index 0000000000..a8348bb187 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Coord-members.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Coord< N_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Coord< N_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    at()cutlass::Coord< N_ >inline
    at(int dim)cutlass::Coord< N_ >inline
    at() constcutlass::Coord< N_ >inline
    at(int dim) constcutlass::Coord< N_ >inline
    clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())cutlass::Coord< N_ >inline
    Coord(int value=0)cutlass::Coord< N_ >inline
    Coord(int _idx[])cutlass::Coord< N_ >inline
    count() constcutlass::Coord< N_ >inline
    dot(Coord const &b, T sum) constcutlass::Coord< N_ >inline
    dot(Coord const &b) constcutlass::Coord< N_ >inline
    idxcutlass::Coord< N_ >
    Ncutlass::Coord< N_ >static
    operator!=(Coord< N > const &b) constcutlass::Coord< N_ >inline
    operator*(Coord const &b) constcutlass::Coord< N_ >inline
    operator*=(Coord const &b)cutlass::Coord< N_ >inline
    operator+(Coord const &b) constcutlass::Coord< N_ >inline
    operator+=(Coord const &b)cutlass::Coord< N_ >inline
    operator-(Coord const &b) constcutlass::Coord< N_ >inline
    operator-=(Coord const &b)cutlass::Coord< N_ >inline
    operator/(Coord const &b) constcutlass::Coord< N_ >inline
    operator/=(Coord const &b)cutlass::Coord< N_ >inline
    operator==(Coord< N > const &b) constcutlass::Coord< N_ >inline
    operator[](int dim)cutlass::Coord< N_ >inline
    operator[](int dim) constcutlass::Coord< N_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Coord.html b/docs/generated-html/structcutlass_1_1Coord.html new file mode 100644 index 0000000000..18094b37b5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Coord.html @@ -0,0 +1,869 @@ + + + + + + + +Cutlass: cutlass::Coord< N_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Coord< N_ > Struct Template Reference
    +
    +
    + +

    Statically-sized array specifying Coords within a tensor. +

    + +

    #include <coord.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord (int value=0)
     Default ctor initializes uniformly. More...
     
    CUTLASS_HOST_DEVICE Coord (int _idx[])
     Constructs from an array of integers. More...
     
    CUTLASS_HOST_DEVICE Coord operator+ (Coord const &b) const
     Element-wise addition. More...
     
    CUTLASS_HOST_DEVICE Coord operator- (Coord const &b) const
     Element-wise subtraction. More...
     
    CUTLASS_HOST_DEVICE Coord operator* (Coord const &b) const
     Element-wise multiplication. More...
     
    CUTLASS_HOST_DEVICE Coord operator/ (Coord const &b) const
     Element-wise division. More...
     
    CUTLASS_HOST_DEVICE Coordoperator+= (Coord const &b)
     In-place addition. More...
     
    CUTLASS_HOST_DEVICE Coordoperator-= (Coord const &b)
     In-place subtraction. More...
     
    CUTLASS_HOST_DEVICE Coordoperator*= (Coord const &b)
     In-place multiplication. More...
     
    CUTLASS_HOST_DEVICE Coordoperator/= (Coord const &b)
     In-place division. More...
     
    CUTLASS_HOST_DEVICE int & operator[] (int dim)
     Member access operator. More...
     
    CUTLASS_HOST_DEVICE int const & operator[] (int dim) const
     Member access operator. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEdot (Coord const &b, T sum) const
     Computes the dot product of two Coord instances. More...
     
    template<typename T >
    CUTLASS_HOST_DEVICEdot (Coord const &b) const
     Computes the dot product of two Coord instances. More...
     
    template<int Dim>
    CUTLASS_HOST_DEVICE int & at ()
     Gets the index of a given Coord element. More...
     
    CUTLASS_HOST_DEVICE int & at (int dim)
     Access via index; may limit unrolling potential. More...
     
    template<int Dim>
    CUTLASS_HOST_DEVICE int const & at () const
     Gets the index of a given Coord element. More...
     
    CUTLASS_HOST_DEVICE int const & at (int dim) const
     Access via index; may limit unrolling potential. More...
     
    CUTLASS_HOST_DEVICE bool operator== (Coord< N > const &b) const
     Determines if two Coord<> objects are equal. More...
     
    CUTLASS_HOST_DEVICE bool operator!= (Coord< N > const &b) const
     Not equal. More...
     
    CUTLASS_HOST_DEVICE Coordclamp (Coord< N > const &max, Coord< N > const &min=Coord< N >())
     Clamps a coordinate to a range specified by maximum and minimum values. More...
     
    CUTLASS_HOST_DEVICE int count () const
     Returns the product of all elements. More...
     
    + + + + +

    +Public Attributes

    int idx [N]
     Indices. More...
     
    + + + +

    +Static Public Attributes

    static int const N = N_
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ Coord() [1/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::Coord< N_ >::Coord (int value = 0)
    +
    +inline
    +
    + +
    +
    + +

    ◆ Coord() [2/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::Coord< N_ >::Coord (int _idx[])
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at() [1/4]

    + +
    +
    +
    +template<int N_>
    +
    +template<int Dim>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int& cutlass::Coord< N_ >::at ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [2/4]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int& cutlass::Coord< N_ >::at (int dim)
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [3/4]

    + +
    +
    +
    +template<int N_>
    +
    +template<int Dim>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int const& cutlass::Coord< N_ >::at () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [4/4]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int const& cutlass::Coord< N_ >::at (int dim) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ clamp()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::clamp (Coord< N > const & max,
    Coord< N > const & min = Coord<N>() 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ count()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::Coord< N_ >::count () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ dot() [1/2]

    + +
    +
    +
    +template<int N_>
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE T cutlass::Coord< N_ >::dot (Coord< N_ > const & b,
    sum 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ dot() [2/2]

    + +
    +
    +
    +template<int N_>
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE T cutlass::Coord< N_ >::dot (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator!=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::Coord< N_ >::operator!= (Coord< N > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator*()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator* (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator*=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator*= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator+()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator+ (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator+=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator+= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator-()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator- (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator-=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator-= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator/()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord cutlass::Coord< N_ >::operator/ (Coord< N_ > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator/=()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE Coord& cutlass::Coord< N_ >::operator/= (Coord< N_ > const & b)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator==()

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::Coord< N_ >::operator== (Coord< N > const & b) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int& cutlass::Coord< N_ >::operator[] (int dim)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int const& cutlass::Coord< N_ >::operator[] (int dim) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ idx

    + +
    +
    +
    +template<int N_>
    + + + + +
    int cutlass::Coord< N_ >::idx[N]
    +
    + +
    +
    + +

    ◆ N

    + +
    +
    +
    +template<int N_>
    + + + + + +
    + + + + +
    int const cutlass::Coord< N_ >::N = N_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Copy-members.html b/docs/generated-html/structcutlass_1_1Copy-members.html new file mode 100644 index 0000000000..42d212fc59 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Copy-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Copy< Fragment_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Copy< Fragment_ >, including all inherited members.

    + + + + + + +
    Copy()cutlass::Copy< Fragment_ >inline
    InputFragment typedefcutlass::Copy< Fragment_ >
    OutputFragment typedefcutlass::Copy< Fragment_ >
    transform(Fragment_ const &src, Fragment_ &dst)cutlass::Copy< Fragment_ >inline
    transform(InputFragment_ const &src, int offset, Fragment_ &dst)cutlass::Copy< Fragment_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Copy.html b/docs/generated-html/structcutlass_1_1Copy.html new file mode 100644 index 0000000000..65da8b81f5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Copy.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::Copy< Fragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Copy< Fragment_ > Struct Template Reference
    +
    +
    + +

    #include <convert.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment_ InputFragment
     The input fragment. More...
     
    typedef Fragment_ OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Copy ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (Fragment_ const &src, Fragment_ &dst)
     Transform a fragment. More...
     
    template<typename InputFragment_ >
    CUTLASS_DEVICE void transform (InputFragment_ const &src, int offset, Fragment_ &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + +
    typedef Fragment_ cutlass::Copy< Fragment_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + +
    typedef Fragment_ cutlass::Copy< Fragment_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ Copy()

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::Copy< Fragment_ >::Copy ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Copy< Fragment_ >::transform (Fragment_ const & src,
    Fragment_ & dst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<typename Fragment_ >
    +
    +template<typename InputFragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::Copy< Fragment_ >::transform (InputFragment_ const & src,
    int offset,
    Fragment_ & dst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent-members.html b/docs/generated-html/structcutlass_1_1Extent-members.html new file mode 100644 index 0000000000..f0a499a6b1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Extent< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Extent< T >, including all inherited members.

    + + +
    kValuecutlass::Extent< T >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent.html b/docs/generated-html/structcutlass_1_1Extent.html new file mode 100644 index 0000000000..0efd6f8398 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::Extent< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Extent< T > Struct Template Reference
    +
    +
    + +

    Returns the extent of a scalar or vector. +

    + +

    #include <vector.h>

    + + + + +

    +Static Public Attributes

    static size_t const kValue = 1
     
    +

    Member Data Documentation

    + +

    ◆ kValue

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + +
    size_t const cutlass::Extent< T >::kValue = 1
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html new file mode 100644 index 0000000000..f9f4108b0a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Extent< Vector< T, Lanes > > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Extent< Vector< T, Lanes > >, including all inherited members.

    + + +
    kValuecutlass::Extent< Vector< T, Lanes > >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html new file mode 100644 index 0000000000..705991660e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::Extent< Vector< T, Lanes > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Extent< Vector< T, Lanes > > Struct Template Reference
    +
    +
    + +

    Returns the number of lanes of a vector if need be. +

    + +

    #include <vector.h>

    + + + + +

    +Static Public Attributes

    static size_t const kValue = Lanes
     
    +

    Member Data Documentation

    + +

    ◆ kValue

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    size_t const cutlass::Extent< Vector< T, Lanes > >::kValue = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html new file mode 100644 index 0000000000..7c95798ee8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Extent< Vector< T, Lanes > const > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Extent< Vector< T, Lanes > const >, including all inherited members.

    + + +
    kValuecutlass::Extent< Vector< T, Lanes > const >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html new file mode 100644 index 0000000000..6afa4a192b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Extent_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::Extent< Vector< T, Lanes > const > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Extent< Vector< T, Lanes > const > Struct Template Reference
    +
    +
    + +

    Returns the number of lanes of a vector if need be. +

    + +

    #include <vector.h>

    + + + + +

    +Static Public Attributes

    static size_t const kValue = Lanes
     
    +

    Member Data Documentation

    + +

    ◆ kValue

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    size_t const cutlass::Extent< Vector< T, Lanes > const >::kValue = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Fragment-members.html b/docs/generated-html/structcutlass_1_1Fragment-members.html new file mode 100644 index 0000000000..b6b050c04b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Fragment-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Fragment< Element_, kElements_, kAlignment_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1Fragment.html b/docs/generated-html/structcutlass_1_1Fragment.html new file mode 100644 index 0000000000..640994f862 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Fragment.html @@ -0,0 +1,280 @@ + + + + + + + +Cutlass: cutlass::Fragment< Element_, kElements_, kAlignment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Fragment< Element_, kElements_, kAlignment_ > Struct Template Reference
    +
    +
    + +

    A template defining Fragment Concept. +

    + +

    #include <fragment.h>

    +
    +Inheritance diagram for cutlass::Fragment< Element_, kElements_, kAlignment_ >:
    +
    +
    + + +cutlass::AlignedStruct< kAlignment_ > + +
    + + + + + + + + +

    +Public Types

    typedef Fragment< Element_, kElements_ > This_
     Make sure the alignment makes sense wrt the size of elements. More...
     
    typedef Element_ Element
     The element. More...
     
    + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE void clear ()
     Clear a fragment. More...
     
    CUTLASS_DEVICE Elementoperator[] (int i)
     The accessor. More...
     
    CUTLASS_DEVICE Element const & operator[] (int i) const
     The accessor. More...
     
    + + + + +

    +Static Public Attributes

    static int const kElements = kElements_
     The number of elements. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Element

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + +
    typedef Element_ cutlass::Fragment< Element_, kElements_, kAlignment_ >::Element
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + +
    typedef Fragment<Element_, kElements_> cutlass::Fragment< Element_, kElements_, kAlignment_ >::This_
    +
    +

    Alignment must be a power of two This class.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ clear()

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::Fragment< Element_, kElements_, kAlignment_ >::clear ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Element& cutlass::Fragment< Element_, kElements_, kAlignment_ >::operator[] (int i)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Element const& cutlass::Fragment< Element_, kElements_, kAlignment_ >::operator[] (int i) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kElements

    + +
    +
    +
    +template<typename Element_ , int kElements_, size_t kAlignment_ = 16>
    + + + + + +
    + + + + +
    int const cutlass::Fragment< Element_, kElements_, kAlignment_ >::kElements = kElements_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Fragment.png b/docs/generated-html/structcutlass_1_1Fragment.png new file mode 100644 index 0000000000000000000000000000000000000000..c4bfbfc462b82c7cb0e6821edb83d455e6985bc0 GIT binary patch literal 1117 zcmeAS@N?(olHy`uVBq!ia0y~yVDtmB12~w0q@!963y_is@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|!2HJ3#WAFU@$KB3MQ=0&T=+d({{Nr6 zJX*T@!lm$+AHVuoPRZj8=&}kqsBQUN=9Fc6`-w>+la@T^nlmXS_`wvF*-Xn-Zi-l_ zo>bF5XA(Q9WXaCoNq2VZ@BO#!5;dCBd)55eiJ-b{jT2h z+$5Lv331=O)6T{|{jc|+@=ncmzL?|UJ;AqkC;mQhx@yy>3$>5JEvNLG?Ns#iPdRYY{o<8OJ* ze!H$v+%BSbr{7L%WsHAg9s9Yj5;sME?|y9XVCQ94qQPpKE}-LaGYd1D0cGVybB zius<-+3d)$-!Yy+k)fSQy}>&^{-E%IB?mYkTd$LPW5RDWA&~ZY%4Usr|tcB;^7UGwq2AycQd>5(Pq!D7UI)D zDWXIt`$EsGt>;#3naXwdvCiYZ`^=}duKB{wmu@#ZQZj09b#{>Nyikkk$IISi_vgNr zVc8nro%N8t`uW#thuXDU3+^Y*@i|>mX7N8ED0J4GhcPSWivBjsGH+hDbLE68$t<1b z?+j9J#Xnt2jCpf)W?ARwdrQt0PFwU!XvBl^yXWoKM_t`OI3| zm4#(t)yHSPR6KV#@2dWboA)aB-}t@wh2O^bA1WHxmrs^@JNNESV2XL0@WyMA=C@CN zn}b6(?4G=L-MoF*`0iz&`*dEW&uo_NpI>)BcGd`;d6B|ip|f1vL3#SmrRSUZe->Pm zmTK@Wma?1w+;;x#z%^+O#gdzgQ}{pZ{-VQ`zmfURdh{64pQE`{#7k4P`5$AWs+-(e TjljLYvVg(U)z4*}Q$iB}KfD@B literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html b/docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html new file mode 100644 index 0000000000..0de6feda55 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentConstIterator-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    at(int d, int h, int w, int c=0) constcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    Element typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    Fragment typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    FragmentConstIterator(OtherFragment_ &fragment, int offset=0)cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    FragmentConstIterator(FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    FragmentShape typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    Iterations typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    IterationsStrides typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    kElementsPerAccesscutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >static
    operator[](int i) constcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    pointercutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    This_ typedefcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >
    valid(int d, int h, int w, int c) constcutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentConstIterator.html b/docs/generated-html/structcutlass_1_1FragmentConstIterator.html new file mode 100644 index 0000000000..6a02e2f1eb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentConstIterator.html @@ -0,0 +1,506 @@ + + + + + + + +Cutlass: cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
     This class. More...
     
    typedef Fragment_ Fragment
     The fragment. More...
     
    typedef Iterations_ Iterations
     The number of iterations. More...
     
    typedef AccessType_ AccessType
     The access type. More...
     
    typedef Fragment::Element Element
     The element. More...
     
    typedef ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
     The shape of the the fragment. More...
     
    typedef ShapeStrides< FragmentShape >::Shape IterationsStrides
     The linear strides for iterations. More...
     
    + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename OtherFragment_ >
    CUTLASS_DEVICE FragmentConstIterator (OtherFragment_ &fragment, int offset=0)
     Ctor. More...
     
    CUTLASS_DEVICE FragmentConstIterator (FragmentIterator< Fragment_, Iterations_, AccessType_ > const &rhs_)
     Create from non-constant FragmentIterator. More...
     
    CUTLASS_DEVICE AccessType const & at (int d, int h, int w, int c=0) const
     The accessor. More...
     
    CUTLASS_DEVICE AccessType const & operator[] (int i) const
     The accessor. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + +

    +Public Attributes

    Element const * pointer
     The pointer. More...
     
    + + + + +

    +Static Public Attributes

    static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
     The number of elements per access. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef AccessType_ cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::AccessType
    +
    + +
    +
    + +

    ◆ Element

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef Fragment::Element cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::Element
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef Fragment_ cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef ShapeMul<Iterations, Shape<1, 1, 1, kElementsPerAccess> >::Shape cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef Iterations_ cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::Iterations
    +
    + +
    +
    + +

    ◆ IterationsStrides

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef ShapeStrides<FragmentShape>::Shape cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::IterationsStrides
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    typedef FragmentIterator<Fragment_, Iterations_, AccessType_> cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::This_
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentConstIterator() [1/2]

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    +
    +template<typename OtherFragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::FragmentConstIterator (OtherFragment_ & fragment,
    int offset = 0 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ FragmentConstIterator() [2/2]

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::FragmentConstIterator (FragmentIterator< Fragment_, Iterations_, AccessType_ > const & rhs_)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::at (int d,
    int h,
    int w,
    int c = 0 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]()

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::operator[] (int i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kElementsPerAccess

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + + +
    + + + + +
    int const cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
    +
    +static
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Fragment_ , typename Iterations_ , typename AccessType_ >
    + + + + +
    Element const* cutlass::FragmentConstIterator< Fragment_, Iterations_, AccessType_ >::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentIterator-members.html b/docs/generated-html/structcutlass_1_1FragmentIterator-members.html new file mode 100644 index 0000000000..eba645feb4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentIterator-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    at(int d, int h, int w, int c=0) constcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    at(int d, int h, int w, int c=0)cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    Element typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    Fragment typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    FragmentIterator(OtherFragment_ &fragment, int offset=0)cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    FragmentShape typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    Iterations typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    kElementsPerAccesscutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >static
    operator[](int i) constcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    operator[](int i)cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    pointercutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    Strides typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    This_ typedefcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >
    valid(int d, int h, int w, int c) constcutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentIterator.html b/docs/generated-html/structcutlass_1_1FragmentIterator.html new file mode 100644 index 0000000000..3bd495f67a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentIterator.html @@ -0,0 +1,562 @@ + + + + + + + +Cutlass: cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ > Struct Template Reference
    +
    +
    + +

    A template defining Fragment Iterator Concept. +

    + +

    #include <fragment.h>

    + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef FragmentIterator< Fragment_, Iterations_, AccessType_ > This_
     This class. More...
     
    typedef Fragment_ Fragment
     The fragment. More...
     
    typedef Iterations_ Iterations
     The number of iterations. More...
     
    typedef AccessType_ AccessType
     The access type. More...
     
    typedef Fragment::Element Element
     The element. More...
     
    typedef ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
     The shape of the the fragment. More...
     
    typedef ShapeStrides< FragmentShape >::Shape Strides
     The linear strides for iterations. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename OtherFragment_ >
    CUTLASS_DEVICE FragmentIterator (OtherFragment_ &fragment, int offset=0)
     Ctor. More...
     
    CUTLASS_DEVICE AccessType const & at (int d, int h, int w, int c=0) const
     The accessor. More...
     
    CUTLASS_DEVICE AccessTypeat (int d, int h, int w, int c=0)
     The accessor. More...
     
    CUTLASS_DEVICE AccessType const & operator[] (int i) const
     The accessor. More...
     
    CUTLASS_DEVICE AccessTypeoperator[] (int i)
     The accessor. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + +

    +Public Attributes

    Elementpointer
     The pointer. More...
     
    + + + + +

    +Static Public Attributes

    static int const kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
     The number of elements per access. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef AccessType_ cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::AccessType
    +
    + +
    +
    + +

    ◆ Element

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef Fragment::Element cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Element
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef Fragment_ cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef ShapeMul<Iterations, Shape<1, 1, 1, kElementsPerAccess> >::Shape cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef Iterations_ cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Iterations
    +
    + +
    +
    + +

    ◆ Strides

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef ShapeStrides<FragmentShape>::Shape cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::Strides
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    typedef FragmentIterator<Fragment_, Iterations_, AccessType_> cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::This_
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentIterator()

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    +
    +template<typename OtherFragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::FragmentIterator (OtherFragment_ & fragment,
    int offset = 0 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at() [1/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::at (int d,
    int h,
    int w,
    int c = 0 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ at() [2/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE AccessType& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::at (int d,
    int h,
    int w,
    int c = 0 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE AccessType const& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::operator[] (int i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE AccessType& cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::operator[] (int i)
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kElementsPerAccess

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + + +
    + + + + +
    int const cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::kElementsPerAccess = (int)(sizeof(AccessType) / sizeof(Element))
    +
    +static
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Fragment_, typename Iterations_, typename AccessType_>
    + + + + +
    Element* cutlass::FragmentIterator< Fragment_, Iterations_, AccessType_ >::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad.html b/docs/generated-html/structcutlass_1_1FragmentLoad.html new file mode 100644 index 0000000000..634c7f0816 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentLoad< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html new file mode 100644 index 0000000000..13d9dfaccc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_29bcae86cc02cb793583fe6b659e7a83.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html new file mode 100644 index 0000000000..f0d09eca75 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___00_9bf6f8f94e2cd7f3702b853d418a9863.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, kAccessSize >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &value, Scalar_ const *pointer, int offset)
     The load function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef Vectorize<Scalar_, kAccessSize>::Type cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentLoad< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load (AccessTypevalue,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html new file mode 100644 index 0000000000..ee593a7a09 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__1ca6d6e2bd7dd222c0b3a77a665e36fe.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html new file mode 100644 index 0000000000..eb1e46372d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentLoad_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar__a157bdca477e8efca5bc9cda0db6db8e.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef FragmentElement_ AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &value, Scalar_ const *pointer, int offset)
     The load function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef FragmentElement_ cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentLoad< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::load (AccessTypevalue,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore.html b/docs/generated-html/structcutlass_1_1FragmentStore.html new file mode 100644 index 0000000000..762e813985 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStore< kIteratorFragment, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html new file mode 100644 index 0000000000..36e1c183ff --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0039852e55b713e99520c56b76ce64b290.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html new file mode 100644 index 0000000000..fbbd941bf6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kScalar_00_01kAccessSize_00_01Scalar___0087787c90510d0c4c07703b5a90c263de.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, kAccessSize >::Type AccessType
     The input type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &value, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef Vectorize<Scalar_, kAccessSize>::Type cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentStore< IteratorFragment::kScalar, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store (AccessType const & value,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html new file mode 100644 index 0000000000..ed4e0fbc8c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_00c2299561c3ffbb17f8afc6add32eba.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Struct Template Reference
    +
    +
    + +

    #include <fragment_load_store.h>

    + + + + + +

    +Public Types

    typedef FragmentElement_ AccessType
     The input type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &value, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + +
    typedef FragmentElement_ cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<int kAccessSize, typename Scalar_ , MemorySpace::Kind Memory_, typename FragmentElement_ , int kStride>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride >::store (AccessType const & value,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html new file mode 100644 index 0000000000..f2869ed0c3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStore_3_01IteratorFragment_1_1kWmmaMatrix_00_01kAccessSize_00_01Scalar_dea9a5a5c980336e8c43a15909be3cdb.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStore< IteratorFragment::kWmmaMatrix, kAccessSize, Scalar_, Memory_, FragmentElement_, kStride > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream-members.html b/docs/generated-html/structcutlass_1_1FragmentStream-members.html new file mode 100644 index 0000000000..fd69efbde9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream-members.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + +
    commit()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    Convert typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    convertcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    fetchcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    Fragment typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    FragmentStream()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    FragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    Index typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    load()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inline
    load_iteratorcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    LoadIterator typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    predicatescutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    shared_store_fence()cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >inlinestatic
    SharedStoreStorage typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    Storage typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    store_iteratorcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    StoreFragment typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    StoreIterator typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    Traits typedefcutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream.html b/docs/generated-html/structcutlass_1_1FragmentStream.html new file mode 100644 index 0000000000..8e249098b6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream.html @@ -0,0 +1,598 @@ + + + + + + + +Cutlass: cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ > Struct Template Reference
    +
    +
    + +

    Manages a pair of iterators to stream data from global memory to shared. +

    + +

    #include <fragment_stream.h>

    + + + + + +

    +Classes

    struct  Params
     Parameters passed to initialize the ierator. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Traits_ Traits
     Defines traits of WMMA GEMM tile stream. More...
     
    typedef LoadIterator_ LoadIterator
     Defines the load iterator. More...
     
    typedef StoreIterator_ StoreIterator
     Defines the store iterator. More...
     
    typedef Convert_ Convert
     Converts between tiles. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef LoadIterator::Fragment Fragment
     Loaded fragment type. More...
     
    typedef StoreIterator::Fragment StoreFragment
     Stored fragment type. More...
     
    typedef StoreIterator::Storage Storage
     Destination storage. More...
     
    typedef StoreIterator::Storage SharedStoreStorage
     The storage in shared memory. More...
     
    + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE FragmentStream ()
     
    CUTLASS_DEVICE FragmentStream (Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Constructor. More...
     
    CUTLASS_DEVICE void load ()
     Loads the fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commits the fragment. More...
     
    CUTLASS_DEVICE void initialize_predicates (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     Recomputes predicates. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Attributes

    LoadIterator load_iterator
     Loads fragment from global memory. More...
     
    LoadIterator::PredicateVector predicates
     Predicate vector. More...
     
    StoreIterator store_iterator
     Stores fragment to shared memory. More...
     
    Fragment fetch
     Fragment fetched by load iterator. More...
     
    Convert convert
     Converts between load fragments and store fragments. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Convert

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef Convert_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Convert
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef LoadIterator::Fragment cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Fragment
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ LoadIterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef LoadIterator_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::LoadIterator
    +
    + +
    +
    + +

    ◆ SharedStoreStorage

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Storage cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::SharedStoreStorage
    +
    + +
    +
    + +

    ◆ Storage

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Storage cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Storage
    +
    + +
    +
    + +

    ◆ StoreFragment

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Fragment cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::StoreFragment
    +
    + +
    +
    + +

    ◆ StoreIterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::StoreIterator
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef Traits_ cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentStream() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::FragmentStream ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ FragmentStream() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::FragmentStream (Params const & params,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0) 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::initialize_predicates (Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::load ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE void cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::shared_store_fence ()
    +
    +inlinestatic
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ convert

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    Convert cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::convert
    +
    + +
    +
    + +

    ◆ fetch

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    Fragment cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::fetch
    +
    + +
    +
    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    LoadIterator cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::load_iterator
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    LoadIterator::PredicateVector cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    StoreIterator cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::store_iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html new file mode 100644 index 0000000000..e629def9c9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html new file mode 100644 index 0000000000..a8708366f0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1FragmentStream_1_1Params.html @@ -0,0 +1,230 @@ + + + + + + + +Cutlass: cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params Struct Reference
    +
    +
    + +

    Parameters passed to initialize the ierator. +

    + +

    #include <fragment_stream.h>

    + + + + + + + + +

    +Public Types

    typedef LoadIterator::Params LoadParams
     Load parameters. More...
     
    typedef StoreIterator::Params StoreParams
     Store parameters. More...
     
    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (LoadParams const &_load_params, StoreParams const &_store_params)
     Initializes parameters. More...
     
    + + + + + + + +

    +Public Attributes

    LoadParams load_params
     Parameters to load iterator. More...
     
    StoreParams store_params
     Parameters to the store iterator. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ LoadParams

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef LoadIterator::Params cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::LoadParams
    +
    + +
    +
    + +

    ◆ StoreParams

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    typedef StoreIterator::Params cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::StoreParams
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::initialize (LoadParams const & _load_params,
    StoreParams const & _store_params 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ load_params

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    LoadParams cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::load_params
    +
    + +
    +
    + +

    ◆ store_params

    + +
    +
    +
    +template<typename Traits_, typename LoadIterator_, typename StoreIterator_, typename Convert_ = FragmentCopy<typename StoreIterator_::Fragment, typename LoadIterator_::Fragment>, typename Index_ = int>
    + + + + +
    StoreParams cutlass::FragmentStream< Traits_, LoadIterator_, StoreIterator_, Convert_, Index_ >::Params::store_params
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1GemmOperand-members.html b/docs/generated-html/structcutlass_1_1GemmOperand-members.html new file mode 100644 index 0000000000..1f0d6f4825 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1GemmOperand-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::GemmOperand Member List
    +
    +
    + +

    This is the complete list of members for cutlass::GemmOperand, including all inherited members.

    + + + + + + +
    kA enum valuecutlass::GemmOperand
    kB enum valuecutlass::GemmOperand
    kC enum valuecutlass::GemmOperand
    kD enum valuecutlass::GemmOperand
    Kind enum namecutlass::GemmOperand
    + + + + diff --git a/docs/generated-html/structcutlass_1_1GemmOperand.html b/docs/generated-html/structcutlass_1_1GemmOperand.html new file mode 100644 index 0000000000..b97ab4795a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1GemmOperand.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::GemmOperand Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::GemmOperand Struct Reference
    +
    +
    + +

    Gemm operand - D = A * B + C. +

    + +

    #include <matrix_traits.h>

    + + + + +

    +Public Types

    enum  Kind { kA, +kB, +kC, +kD + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::GemmOperand::Kind
    +
    + + + + + +
    Enumerator
    kA 
    kB 
    kC 
    kD 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Identity-members.html b/docs/generated-html/structcutlass_1_1Identity-members.html new file mode 100644 index 0000000000..7d06d43203 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Identity-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Identity Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Identity, including all inherited members.

    + + + + +
    Additive enum valuecutlass::Identity
    Kind enum namecutlass::Identity
    Multiplicative enum valuecutlass::Identity
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Identity.html b/docs/generated-html/structcutlass_1_1Identity.html new file mode 100644 index 0000000000..1629a334c7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Identity.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::Identity Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Identity Struct Reference
    +
    +
    + +

    Describes identity elements. +

    + +

    #include <coord.h>

    + + + + +

    +Public Types

    enum  Kind { Additive = 0, +Multiplicative = 1 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::Identity::Kind
    +
    +

    Enumeration describing identity elements. Value assignments are significant. Feel free to add or multiply by these, respectively.

    + + + +
    Enumerator
    Additive 
    Multiplicative 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorAdvance-members.html b/docs/generated-html/structcutlass_1_1IteratorAdvance-members.html new file mode 100644 index 0000000000..b7e004e469 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorAdvance-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::IteratorAdvance Member List
    +
    +
    + +

    This is the complete list of members for cutlass::IteratorAdvance, including all inherited members.

    + + + + + +
    kD enum valuecutlass::IteratorAdvance
    kH enum valuecutlass::IteratorAdvance
    Kind enum namecutlass::IteratorAdvance
    kW enum valuecutlass::IteratorAdvance
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorAdvance.html b/docs/generated-html/structcutlass_1_1IteratorAdvance.html new file mode 100644 index 0000000000..91a9d3bccb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorAdvance.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::IteratorAdvance Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::IteratorAdvance Struct Reference
    +
    +
    + +

    Specifies dimension in which post-increment accesses advance. +

    + +

    #include <tile_iterator.h>

    + + + + +

    +Public Types

    enum  Kind { kD, +kH, +kW + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    Enumerator
    kD 
    kH 
    kW 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorFragment-members.html b/docs/generated-html/structcutlass_1_1IteratorFragment-members.html new file mode 100644 index 0000000000..2ae9833d59 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorFragment-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::IteratorFragment Member List
    +
    +
    + +

    This is the complete list of members for cutlass::IteratorFragment, including all inherited members.

    + + + + +
    Kind enum namecutlass::IteratorFragment
    kScalar enum valuecutlass::IteratorFragment
    kWmmaMatrix enum valuecutlass::IteratorFragment
    + + + + diff --git a/docs/generated-html/structcutlass_1_1IteratorFragment.html b/docs/generated-html/structcutlass_1_1IteratorFragment.html new file mode 100644 index 0000000000..f02ab2c934 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1IteratorFragment.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: cutlass::IteratorFragment Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::IteratorFragment Struct Reference
    +
    +
    + +

    Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. +

    + +

    #include <tile_iterator.h>

    + + + + +

    +Public Types

    enum  Kind { kScalar, +kWmmaMatrix + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + +
    Enumerator
    kScalar 
    kWmmaMatrix 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load-members.html b/docs/generated-html/structcutlass_1_1Load-members.html new file mode 100644 index 0000000000..f977a3854f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load.html b/docs/generated-html/structcutlass_1_1Load.html new file mode 100644 index 0000000000..e3640b2547 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The load function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, bool, size_t >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 0000000000..98b54d313a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 0000000000..c7036f1d46 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, true, 16 >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html new file mode 100644 index 0000000000..5e3d4f237d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html new file mode 100644 index 0000000000..432e4a0308 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, true, 4 >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html new file mode 100644 index 0000000000..9b93f91b44 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >
    load(AccessType &dst, Scalar_ const *pointer, int offset)cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html new file mode 100644 index 0000000000..021b3f7c94 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, Scalar_ const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< Scalar_, Lanes_, Memory_, true, 8 >::load (AccessTypedst,
    Scalar_ const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 0000000000..599c7dbe1e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Load< double, 2, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Load< double, 2, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Load< double, 2, Memory_, true, 16 >
    load(AccessType &dst, double const *pointer, int offset)cutlass::Load< double, 2, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 0000000000..7afbc80a80 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Load_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Load< double, 2, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Load< double, 2, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< double, 2 >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void load (AccessType &dst, double const *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<double, 2>::Type cutlass::Load< double, 2, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ load()

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Load< double, 2, Memory_, true, 16 >::load (AccessTypedst,
    double const * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MatrixLayout-members.html b/docs/generated-html/structcutlass_1_1MatrixLayout-members.html new file mode 100644 index 0000000000..9a6cfd7451 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MatrixLayout-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::MatrixLayout Member List
    +
    +
    + +

    This is the complete list of members for cutlass::MatrixLayout, including all inherited members.

    + + + + +
    kColumnMajor enum valuecutlass::MatrixLayout
    Kind enum namecutlass::MatrixLayout
    kRowMajor enum valuecutlass::MatrixLayout
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MatrixLayout.html b/docs/generated-html/structcutlass_1_1MatrixLayout.html new file mode 100644 index 0000000000..1a79f4d144 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MatrixLayout.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: cutlass::MatrixLayout Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::MatrixLayout Struct Reference
    +
    +
    + +

    Describes layouts of matrices. +

    + +

    #include <matrix_traits.h>

    + + + + +

    +Public Types

    enum  Kind { kRowMajor, +kColumnMajor + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::MatrixLayout::Kind
    +
    + + + +
    Enumerator
    kRowMajor 
    kColumnMajor 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MemorySpace-members.html b/docs/generated-html/structcutlass_1_1MemorySpace-members.html new file mode 100644 index 0000000000..22af2209a1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MemorySpace-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::MemorySpace Member List
    +
    +
    + +

    This is the complete list of members for cutlass::MemorySpace, including all inherited members.

    + + + + + +
    kGeneric enum valuecutlass::MemorySpace
    kGlobal enum valuecutlass::MemorySpace
    Kind enum namecutlass::MemorySpace
    kShared enum valuecutlass::MemorySpace
    + + + + diff --git a/docs/generated-html/structcutlass_1_1MemorySpace.html b/docs/generated-html/structcutlass_1_1MemorySpace.html new file mode 100644 index 0000000000..410826bf33 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1MemorySpace.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::MemorySpace Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::MemorySpace Struct Reference
    +
    +
    + +

    Enum to specify which memory space data resides in. +

    + +

    #include <load_store.h>

    + + + + +

    +Public Types

    enum  Kind { kGeneric, +kShared, +kGlobal + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ Kind

    + +
    +
    + + + + +
    enum cutlass::MemorySpace::Kind
    +
    + + + + +
    Enumerator
    kGeneric 
    kShared 
    kGlobal 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html b/docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html new file mode 100644 index 0000000000..f064207c5a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateTileAdapter-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateTileAdapter.html b/docs/generated-html/structcutlass_1_1PredicateTileAdapter.html new file mode 100644 index 0000000000..0dd3d00ad8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateTileAdapter.html @@ -0,0 +1,290 @@ + + + + + + + +Cutlass: cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ > Struct Template Reference
    +
    +
    + +

    Adapter to enable random access to predicates via logical coordinate within a tile. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + +

    +Public Types

    typedef PredicateVector_ PredicateVector
     The vector of predicates. More...
     
    typedef Iterations_ Iterations
     The iterations. More...
     
    + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE PredicateTileAdapter (PredicateVector &predicates_)
     Ctor. More...
     
    CUTLASS_DEVICE bool at (int d, int h, int w, int c) const
     Get the value at location (d, h, w, c). More...
     
    CUTLASS_DEVICE void set (int d, int h, int w, int c, bool value)
     Set the value at location (d, h, w, c). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef Iterations_ cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + +
    typedef PredicateVector_ cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::PredicateVector
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ PredicateTileAdapter()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::PredicateTileAdapter (PredicateVectorpredicates_)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::at (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ set()

    + +
    +
    +
    +template<typename PredicateVector_ , typename Iterations_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::PredicateTileAdapter< PredicateVector_, Iterations_ >::set (int d,
    int h,
    int w,
    int c,
    bool value 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector-members.html b/docs/generated-html/structcutlass_1_1PredicateVector-members.html new file mode 100644 index 0000000000..b4475cc1e5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    at(int idx) constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    begin()cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    const_begin() constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    const_end() constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    end()cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    fill(bool value=true)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    is_zero() constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    kBytescutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kPredicatescutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kPredicatesPerBytecutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kPredicateStartcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    kWordCountcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >static
    operator &=(PredicateVector const &predicates)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    operator[](int idx) constcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    operator|=(PredicateVector const &predicates)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    PredicateVector(bool value=true)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    set(int idx, bool value=true)cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >inline
    Storage typedefcutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector.html b/docs/generated-html/structcutlass_1_1PredicateVector.html new file mode 100644 index 0000000000..43645c578d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector.html @@ -0,0 +1,658 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > Struct Template Reference
    +
    +
    + +

    Statically sized array of bits implementing. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + + + + +

    +Classes

    class  ConstIterator
     A const iterator implementing Predicate Iterator Concept enabling sequential read-only access to prediactes. More...
     
    class  Iterator
     An iterator implementing Predicate Iterator Concept enabling sequential read and write access to predicates. More...
     
    struct  TrivialIterator
     Iterator that always returns true. More...
     
    + + + + +

    +Public Types

    typedef uint32_t Storage
     Storage type of individual elements. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE PredicateVector (bool value=true)
     Initialize the predicate vector. More...
     
    CUTLASS_HOST_DEVICE void fill (bool value=true)
     Fills all predicates with a given value. More...
     
    CUTLASS_HOST_DEVICE bool operator[] (int idx) const
     Accesses a bit within the predicate vector. More...
     
    CUTLASS_HOST_DEVICE bool at (int idx) const
     Accesses a bit within the predicate vector. More...
     
    CUTLASS_HOST_DEVICE void set (int idx, bool value=true)
     Set a bit within the predicate vector. More...
     
    CUTLASS_HOST_DEVICE PredicateVectoroperator &= (PredicateVector const &predicates)
     Computes the intersection of two identical predicate vectors. More...
     
    CUTLASS_HOST_DEVICE PredicateVectoroperator|= (PredicateVector const &predicates)
     Computes the union of two identical predicate vectors. More...
     
    CUTLASS_HOST_DEVICE bool is_zero () const
     Returns true if entire predicate array is zero. More...
     
    CUTLASS_DEVICE Iterator begin ()
     Returns an iterator to the start of the bit vector. More...
     
    CUTLASS_DEVICE Iterator end ()
     Returns an iterator. More...
     
    CUTLASS_DEVICE ConstIterator const_begin () const
     Returns a ConstIterator. More...
     
    CUTLASS_DEVICE ConstIterator const_end () const
     Returns a ConstIterator. More...
     
    + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kPredicates = kPredicates_
     Number of bits stored by the PredicateVector. More...
     
    static int const kPredicatesPerByte = kPredicatesPerByte_
     Number of bits stored within each byte of the predicate bit vector. More...
     
    static int const kPredicateStart = kPredicateStart_
     First bit withing each byte containing predicates. More...
     
    static int const kBytes = (kPredicates + kPredicatesPerByte - 1) / kPredicatesPerByte
     Number of bytes needed. More...
     
    static int const kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage)
     Number of storage elements needed. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Storage

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + +
    typedef uint32_t cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::Storage
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ PredicateVector()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::PredicateVector (bool value = true)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::at (int idx) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ begin()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::begin ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ const_begin()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::const_begin () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ const_end()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE ConstIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::const_end () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ end()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE Iterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::end ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ fill()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::fill (bool value = true)
    +
    +inline
    +
    + +
    +
    + +

    ◆ is_zero()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::is_zero () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator &=()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE PredicateVector& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::operator&= (PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > const & predicates)
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::operator[] (int idx) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator|=()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE PredicateVector& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::operator|= (PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ > const & predicates)
    +
    +inline
    +
    + +
    +
    + +

    ◆ set()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::set (int idx,
    bool value = true 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kBytes

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kBytes = (kPredicates + kPredicatesPerByte - 1) / kPredicatesPerByte
    +
    +static
    +
    + +
    +
    + +

    ◆ kPredicates

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kPredicates = kPredicates_
    +
    +static
    +
    + +
    +
    + +

    ◆ kPredicatesPerByte

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kPredicatesPerByte = kPredicatesPerByte_
    +
    +static
    +
    + +
    +
    + +

    ◆ kPredicateStart

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kPredicateStart = kPredicateStart_
    +
    +static
    +
    + +
    +
    + +

    ◆ kWordCount

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::kWordCount = (kBytes + sizeof(Storage) - 1) / sizeof(Storage)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html new file mode 100644 index 0000000000..d2f4a9c332 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html new file mode 100644 index 0000000000..4e008feb5b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1PredicateVector_1_1TrivialIterator.html @@ -0,0 +1,287 @@ + + + + + + + +Cutlass: cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator Struct Reference
    +
    +
    + +

    Iterator that always returns true. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE TrivialIterator ()
     Constructor. More...
     
    CUTLASS_HOST_DEVICE TrivialIterator (Iterator const &it)
     Copy constructor. More...
     
    CUTLASS_HOST_DEVICE TrivialIterator (PredicateVector const &_vec)
     Constructs an iterator from a PredicateVector. More...
     
    CUTLASS_HOST_DEVICE TrivialIteratoroperator++ ()
     Pre-increment. More...
     
    CUTLASS_HOST_DEVICE TrivialIterator operator++ (int)
     Post-increment. More...
     
    CUTLASS_HOST_DEVICE bool operator* () const
     Dereferences iterator. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ TrivialIterator() [1/3]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::TrivialIterator ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ TrivialIterator() [2/3]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::TrivialIterator (Iterator const & it)
    +
    +inline
    +
    + +
    +
    + +

    ◆ TrivialIterator() [3/3]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::TrivialIterator (PredicateVector const & _vec)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator*()

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::operator* () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator++() [1/2]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE TrivialIterator& cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::operator++ ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator++() [2/2]

    + +
    +
    +
    +template<int kPredicates_, int kPredicatesPerByte_ = 4, int kPredicateStart_ = 0>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE TrivialIterator cutlass::PredicateVector< kPredicates_, kPredicatesPerByte_, kPredicateStart_ >::TrivialIterator::operator++ (int )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile-members.html b/docs/generated-html/structcutlass_1_1ReshapeTile-members.html new file mode 100644 index 0000000000..03567994b1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, bool > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ReshapeTile< Tile_, kAccessSize_, bool >, including all inherited members.

    + + +
    Tile typedefcutlass::ReshapeTile< Tile_, kAccessSize_, bool >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile.html b/docs/generated-html/structcutlass_1_1ReshapeTile.html new file mode 100644 index 0000000000..936510b309 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ReshapeTile< Tile_, kAccessSize_, bool > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, bool > Struct Template Reference
    +
    +
    + +

    #include <reshape_tile.h>

    + + + + +

    +Public Types

    typedef Tile_ Tile
     
    +

    Member Typedef Documentation

    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_, int kAccessSize_, bool = (Tile_::kC < kAccessSize_)>
    + + + + +
    typedef Tile_ cutlass::ReshapeTile< Tile_, kAccessSize_, bool >::Tile
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html new file mode 100644 index 0000000000..649f85d412 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ReshapeTile< Tile_, kAccessSize_, true >, including all inherited members.

    + + +
    Tile typedefcutlass::ReshapeTile< Tile_, kAccessSize_, true >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html new file mode 100644 index 0000000000..e7acf274de --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ReshapeTile_3_01Tile___00_01kAccessSize___00_01true_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ReshapeTile< Tile_, kAccessSize_, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ReshapeTile< Tile_, kAccessSize_, true > Struct Template Reference
    +
    +
    + +

    #include <reshape_tile.h>

    + + + + +

    +Public Types

    typedef Shape< Tile_::kD, Tile_::kH, Tile_::kW/kAccessSize_, kAccessSize_ > Tile
     
    +

    Member Typedef Documentation

    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int kAccessSize_>
    + + + + +
    typedef Shape<Tile_::kD, Tile_::kH, Tile_::kW / kAccessSize_, kAccessSize_> cutlass::ReshapeTile< Tile_, kAccessSize_, true >::Tile
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Shape-members.html b/docs/generated-html/structcutlass_1_1Shape-members.html new file mode 100644 index 0000000000..629b6d0de2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Shape-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Shape< kD_, kH_, kW_, kC_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1Shape.html b/docs/generated-html/structcutlass_1_1Shape.html new file mode 100644 index 0000000000..e0d5b53fe5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Shape.html @@ -0,0 +1,211 @@ + + + + + + + +Cutlass: cutlass::Shape< kD_, kH_, kW_, kC_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Shape< kD_, kH_, kW_, kC_ > Struct Template Reference
    +
    +
    + +

    A Shape implementing Layout Concept describing the dimensions of a cube. +

    + +

    #include <shape.h>

    + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kD = kD_
     The depth of the cube. More...
     
    static int const kH = kH_
     The height of the cube. More...
     
    static int const kW = kW_
     The width of the cube. More...
     
    static int const kC = kC_
     The number of scalars per element. More...
     
    +

    Member Data Documentation

    + +

    ◆ kC

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kC = kC_
    +
    +static
    +
    + +
    +
    + +

    ◆ kD

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kD = kD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kH

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kH = kH_
    +
    +static
    +
    + +
    +
    + +

    ◆ kW

    + +
    +
    +
    +template<int kD_ = 1, int kH_ = 1, int kW_ = 1, int kC_ = 1>
    + + + + + +
    + + + + +
    int const cutlass::Shape< kD_, kH_, kW_, kC_ >::kW = kW_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeAdd-members.html b/docs/generated-html/structcutlass_1_1ShapeAdd-members.html new file mode 100644 index 0000000000..a7c0d6c423 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeAdd-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeAdd< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeAdd< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeAdd< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeAdd.html b/docs/generated-html/structcutlass_1_1ShapeAdd.html new file mode 100644 index 0000000000..438ecfcffd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeAdd.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeAdd< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeAdd< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD+B_::kD, A_::kH+B_::kH, A_::kW+B_::kW, A_::kC+B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<A_::kD + B_::kD, A_::kH + B_::kH, A_::kW + B_::kW, A_::kC + B_::kC> cutlass::ShapeAdd< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeCount-members.html b/docs/generated-html/structcutlass_1_1ShapeCount-members.html new file mode 100644 index 0000000000..18ff7b21f3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeCount-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeCount< Shape > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeCount.html b/docs/generated-html/structcutlass_1_1ShapeCount.html new file mode 100644 index 0000000000..52f180e35c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeCount.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::ShapeCount< Shape > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeCount< Shape > Struct Template Reference
    +
    +
    + +

    Compute derived counted of a Layout Concept based class. +

    + +

    #include <shape.h>

    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kWc = Shape::kW * Shape::kC
     The number of elements per row. More...
     
    static int const kHw = Shape::kH * Shape::kW
     The number of pixels per image. More...
     
    static int const kHwc = Shape::kH * kWc
     The number of elements per image. More...
     
    static int const kDhw = Shape::kD * kHw
     The number of pixels per cube. More...
     
    static int const kDhwc = Shape::kD * kHwc
     The number of elements in the 4D space. More...
     
    static int const kCount = kDhwc
     The number of elements in the 4D space. More...
     
    +

    Member Data Documentation

    + +

    ◆ kCount

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kCount = kDhwc
    +
    +static
    +
    + +
    +
    + +

    ◆ kDhw

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kDhw = Shape::kD * kHw
    +
    +static
    +
    + +
    +
    + +

    ◆ kDhwc

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kDhwc = Shape::kD * kHwc
    +
    +static
    +
    + +
    +
    + +

    ◆ kHw

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kHw = Shape::kH * Shape::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kHwc

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kHwc = Shape::kH * kWc
    +
    +static
    +
    + +
    +
    + +

    ◆ kWc

    + +
    +
    +
    +template<typename Shape>
    + + + + + +
    + + + + +
    int const cutlass::ShapeCount< Shape >::kWc = Shape::kW * Shape::kC
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeDiv-members.html b/docs/generated-html/structcutlass_1_1ShapeDiv-members.html new file mode 100644 index 0000000000..45aa632954 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeDiv-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeDiv< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeDiv< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeDiv< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeDiv.html b/docs/generated-html/structcutlass_1_1ShapeDiv.html new file mode 100644 index 0000000000..46eb5608e7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeDiv.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeDiv< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeDiv< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_, typename B_>
    + + + + +
    typedef Shape<A_::kD / B_::kD, A_::kH / B_::kH, A_::kW / B_::kW, A_::kC / B_::kC> cutlass::ShapeDiv< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMax-members.html b/docs/generated-html/structcutlass_1_1ShapeMax-members.html new file mode 100644 index 0000000000..6c7119c7eb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMax-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeMax< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeMax< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeMax< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMax.html b/docs/generated-html/structcutlass_1_1ShapeMax.html new file mode 100644 index 0000000000..f2f81b900b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMax.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeMax< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeMax< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape<(A_::kD > B_::kD ? A_::kD :B_::kD),(A_::kH > B_::kH ? A_::kH :B_::kH),(A_::kW > B_::kW ? A_::kW :B_::kW),(A_::kC > B_::kC ? A_::kC :B_::kC)> Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<(A_::kD > B_::kD ? A_::kD : B_::kD), (A_::kH > B_::kH ? A_::kH : B_::kH), (A_::kW > B_::kW ? A_::kW : B_::kW), (A_::kC > B_::kC ? A_::kC : B_::kC)> cutlass::ShapeMax< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMin-members.html b/docs/generated-html/structcutlass_1_1ShapeMin-members.html new file mode 100644 index 0000000000..ce2f15bb97 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMin-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeMin< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeMin< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeMin< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMin.html b/docs/generated-html/structcutlass_1_1ShapeMin.html new file mode 100644 index 0000000000..8cd3b4cf8f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMin.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeMin< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeMin< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<(A_::kD < B_::kD ? A_::kD : B_::kD), (A_::kH < B_::kH ? A_::kH : B_::kH), (A_::kW < B_::kW ? A_::kW : B_::kW), (A_::kC < B_::kC ? A_::kC : B_::kC)> cutlass::ShapeMin< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMul-members.html b/docs/generated-html/structcutlass_1_1ShapeMul-members.html new file mode 100644 index 0000000000..75cf214caf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMul-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeMul< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeMul< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeMul< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeMul.html b/docs/generated-html/structcutlass_1_1ShapeMul.html new file mode 100644 index 0000000000..93d28b06de --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeMul.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeMul< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeMul< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_, typename B_>
    + + + + +
    typedef Shape<A_::kD * B_::kD, A_::kH * B_::kH, A_::kW * B_::kW, A_::kC * B_::kC> cutlass::ShapeMul< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeScale-members.html b/docs/generated-html/structcutlass_1_1ShapeScale-members.html new file mode 100644 index 0000000000..1ba06a848e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeScale-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeScale< A_, kScale_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeScale< A_, kScale_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeScale< A_, kScale_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeScale.html b/docs/generated-html/structcutlass_1_1ShapeScale.html new file mode 100644 index 0000000000..5056a89d49 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeScale.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeScale< A_, kScale_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeScale< A_, kScale_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD *kScale_, A_::kH *kScale_, A_::kW *kScale_, A_::kC *kScale_ > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , int kScale_>
    + + + + +
    typedef Shape<A_::kD * kScale_, A_::kH * kScale_, A_::kW * kScale_, A_::kC * kScale_> cutlass::ShapeScale< A_, kScale_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeStrides-members.html b/docs/generated-html/structcutlass_1_1ShapeStrides-members.html new file mode 100644 index 0000000000..0ca76c50b6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeStrides-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeStrides< Shape_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeStrides< Shape_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeStrides< Shape_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeStrides.html b/docs/generated-html/structcutlass_1_1ShapeStrides.html new file mode 100644 index 0000000000..4328a35277 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeStrides.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeStrides< Shape_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeStrides< Shape_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< Shape_::kH *Shape_::kW *Shape_::kC, Shape_::kW *Shape_::kC, Shape_::kC, 1 > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename Shape_>
    + + + + +
    typedef Shape<Shape_::kH * Shape_::kW * Shape_::kC, Shape_::kW * Shape_::kC, Shape_::kC, 1> cutlass::ShapeStrides< Shape_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeSub-members.html b/docs/generated-html/structcutlass_1_1ShapeSub-members.html new file mode 100644 index 0000000000..666b9cf09f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeSub-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::ShapeSub< A_, B_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::ShapeSub< A_, B_ >, including all inherited members.

    + + +
    Shape typedefcutlass::ShapeSub< A_, B_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1ShapeSub.html b/docs/generated-html/structcutlass_1_1ShapeSub.html new file mode 100644 index 0000000000..81064ec152 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1ShapeSub.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::ShapeSub< A_, B_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::ShapeSub< A_, B_ > Struct Template Reference
    +
    +
    + +

    #include <shape.h>

    + + + + +

    +Public Types

    typedef Shape< A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC > Shape
     
    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename A_ , typename B_ >
    + + + + +
    typedef Shape<A_::kD - B_::kD, A_::kH - B_::kH, A_::kW - B_::kW, A_::kC - B_::kC> cutlass::ShapeSub< A_, B_ >::Shape
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType-members.html b/docs/generated-html/structcutlass_1_1StorageType-members.html new file mode 100644 index 0000000000..69e4ddfd04 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< kAlignment_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< kAlignment_ >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< kAlignment_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType.html b/docs/generated-html/structcutlass_1_1StorageType.html new file mode 100644 index 0000000000..9205f95cc2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::StorageType< kAlignment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< kAlignment_ > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint64_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    +
    +template<int kAlignment_>
    + + + + +
    typedef uint64_t cutlass::StorageType< kAlignment_ >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html new file mode 100644 index 0000000000..401d5fc0c1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< 1 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< 1 >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< 1 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html new file mode 100644 index 0000000000..ceab6b0323 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_011_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::StorageType< 1 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< 1 > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint8_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    + + + + +
    typedef uint8_t cutlass::StorageType< 1 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html new file mode 100644 index 0000000000..ac8127c9b3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< 2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< 2 >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html new file mode 100644 index 0000000000..8464872574 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_012_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::StorageType< 2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< 2 > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint16_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    + + + + +
    typedef uint16_t cutlass::StorageType< 2 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html new file mode 100644 index 0000000000..bf78873c4f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::StorageType< 4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::StorageType< 4 >, including all inherited members.

    + + +
    Type typedefcutlass::StorageType< 4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html new file mode 100644 index 0000000000..74751e5779 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1StorageType_3_014_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::StorageType< 4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::StorageType< 4 > Struct Template Reference
    +
    +
    + +

    #include <fragment.h>

    + + + + +

    +Public Types

    typedef uint32_t Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    + + + + +
    typedef uint32_t cutlass::StorageType< 4 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store-members.html b/docs/generated-html/structcutlass_1_1Store-members.html new file mode 100644 index 0000000000..f942adf611 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store.html b/docs/generated-html/structcutlass_1_1Store.html new file mode 100644 index 0000000000..057010d126 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_, bool = (Lanes_ > 1), size_t = (sizeof(Scalar_) * Lanes_)>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, bool, size_t >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 0000000000..c8f01cced0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 0000000000..69ed54867d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, true, 16 >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html new file mode 100644 index 0000000000..ad692d4f5b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html new file mode 100644 index 0000000000..443c824a67 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_014_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, true, 4 >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html new file mode 100644 index 0000000000..8f54fe4145 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >
    store(AccessType const &src, Scalar_ *pointer, int offset)cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html new file mode 100644 index 0000000000..5f7e301e5e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01Scalar___00_01Lanes___00_01Memory___00_01true_00_018_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< Scalar_, Lanes_ >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, Scalar_ *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<Scalar_, Lanes_>::Type cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<typename Scalar_ , int Lanes_, MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< Scalar_, Lanes_, Memory_, true, 8 >::store (AccessType const & src,
    Scalar_ * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html new file mode 100644 index 0000000000..7d13d6bb1a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Store< double, 2, Memory_, true, 16 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Store< double, 2, Memory_, true, 16 >, including all inherited members.

    + + + +
    AccessType typedefcutlass::Store< double, 2, Memory_, true, 16 >
    store(AccessType const &src, double *pointer, int offset)cutlass::Store< double, 2, Memory_, true, 16 >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html new file mode 100644 index 0000000000..71cc2ab9de --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Store_3_01double_00_012_00_01Memory___00_01true_00_0116_01_4.html @@ -0,0 +1,171 @@ + + + + + + + +Cutlass: cutlass::Store< double, 2, Memory_, true, 16 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Store< double, 2, Memory_, true, 16 > Struct Template Reference
    +
    +
    + +

    #include <load_store.h>

    + + + + + +

    +Public Types

    typedef Vectorize< double, 2 >::Type AccessType
     The output type. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void store (AccessType const &src, double *pointer, int offset)
     The store function. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + +
    typedef Vectorize<double, 2>::Type cutlass::Store< double, 2, Memory_, true, 16 >::AccessType
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ store()

    + +
    +
    +
    +template<MemorySpace::Kind Memory_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::Store< double, 2, Memory_, true, 16 >::store (AccessType const & src,
    double * pointer,
    int offset 
    )
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase-members.html b/docs/generated-html/structcutlass_1_1TileIteratorBase-members.html new file mode 100644 index 0000000000..a313a5115b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase-members.html @@ -0,0 +1,114 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Delta typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Fragment typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentConstIterator typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentElement typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentIterator typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentShape typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Index typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inlinestatic
    Iterations typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    kAccessSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kAdvancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kFragmentSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kIteratorFragmentcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kMemorySpacecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    PredicateVector typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Scalar typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Skew typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Storage typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ThreadOffset typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Tile typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Traits typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    valid(int d, int h, int w, int c) constcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase.html b/docs/generated-html/structcutlass_1_1TileIteratorBase.html new file mode 100644 index 0000000000..a946914977 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase.html @@ -0,0 +1,695 @@ + + + + + + + +Cutlass: cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference
    +
    +
    + +

    Iterator for accessing a stripmined tile in memory. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >:
    +
    +
    + + +cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > +cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters to the iterator. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Traits_ Traits
     concept TileTraits More...
     
    typedef Scalar_ Scalar
     Scalar element. More...
     
    typedef FragmentElement_ FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Skew_ Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + +

    +Public Member Functions

    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + +

    +Static Public Member Functions

    template<typename PredicateIterator >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static IteratorAdvance::Kind const kAdvance = Advance_
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize = Tile::kC
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Vectorize<FragmentElement, kAccessSize>::Type cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::AccessType
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::Delta cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Delta
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Fragment<FragmentElement, ShapeCount<Iterations>::kCount * kAccessSize> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentConstIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentConstIterator<Fragment, Iterations, AccessType> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentConstIterator
    +
    + +
    +
    + +

    ◆ FragmentElement

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentElement_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentElement
    +
    + +
    +
    + +

    ◆ FragmentIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentIterator<Fragment, Iterations, AccessType> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentIterator
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentIterator::FragmentShape cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::ImmediateOffsetStrides cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Index_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::Iterations cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef PredicateVector<ShapeCount<Iterations>::kCount> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Scalar
    +
    + +
    +
    + +

    ◆ Skew

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Skew_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Skew
    +
    + +
    +
    + +

    ◆ Storage

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Fragment<Scalar, ShapeCount<Tile>::kCount, kFragmentSize> cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Storage
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::ThreadOffset cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits::Tile cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Tile
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Traits_ cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Traits
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::initialize_predicates (PredicateIterator predicate_it,
    Coord< 3 > const & bounds,
    Coord< 3 > const & offset = make_Coord(0, 0, 0) 
    )
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAccessSize = Tile::kC
    +
    +static
    +
    + +
    +
    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAdvance = Advance_
    +
    +static
    +
    + +
    +
    + +

    ◆ kFragmentSize

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kFragmentSize
    +
    +static
    +
    +Initial value: +
    +
    + +

    ◆ kIteratorFragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorFragment::Kind const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kIteratorFragment = IteratorFragment_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kMemorySpace = MemorySpace
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase.png b/docs/generated-html/structcutlass_1_1TileIteratorBase.png new file mode 100644 index 0000000000000000000000000000000000000000..ce0eacc9de4519c31a767f8a48a2268ef4061d77 GIT binary patch literal 2958 zcmc&#do+~m8Xsv>8kAh4=H$8wl{743N*pGk#wC)*tz2h@?K*C;-J(72*Ie2($R^`{ z8==UQ(6}`i#$`fM!)%usmpI?hI=i*bI&1Co*ZJ1^zUO_u^?RS^eV^y|{@%D7riT1{ zC-^`h5I@{V9{~dW)CG)h9O4GP2UVPs)iUf&+xXCJ3TH#!u1RhDYgI00r0`{Qhjv1bF zpirkGZt-wNWg{2(@}%_wi05~0Wi$ZS_yKP6e3ekbYiOUP+eBd`%`K=4PbFa!25r(E zo|vVHg1nNkLvQ=%MUl}V(DG+u?Ocbgbicel{9Rzuvq`=O`dU(AzCn#YgaN%x= z+1kR2W&Zu}c&#sogKsytxY7qJQ+55qm=u)Ml#l~yrxeG1r!fCZlv2cyCmmQ4#!8H+ z-QUS#Z8f~jo0*)k?v+{Eoj=XWd>6Uw>s>9+UVozl{w1!~l#5I*mr+%4KA4pC#Pmqt z)z%WK{z1eeXe&5pG4)oSXAR#(kJnP0uv@nMywV}c&7ava*}46~ezcD*kgd>&AypK6 zxHEi6udwv!1(5B2?tv=_yYZ04L09^4(t>8aT}BD9BgK#)7_Sc!s(B*XpV`DE^z!hZ zm!{!ihwxBs-xDA1^5rOZlfQx$qfu8PMqcS;1c^#SKf`ICTKfq>tew7foZCp5N<`bB z=-y9Id;2TIDTv*Hl<$mywX2z%<&%k{6a^wFfii~`)CtjP-#9vN$>of2EJgZLzKKWU zGRyi{L;i)1F?LS~7<^Mj^hHdAJ9EHoD7*Etd#1+rHAfJ?`E-WSR*>~OCrMtQ%Ra93 zL~gdGw&WvzEAn<=3r5*dX9>0JX69HybVn_gmMgD}frpx{P>JHB&y~7`>Vzep)}z_8}BMMT|!{< ztBAerIUWR6)Wy6pkFya@^vmuZ(|T`~Ac>C})V!&h0EP4B#KRgs#Ex}XjRd;cm3W5u zIFzS;8DZD>wBo|LyGr6RVMVcwp5Q5yU??r4;_>O&J7LeU2Rx|W?`m`IdHb(FZMotu z39I7t0r4*AOb6!rI21S!K`uJZf)L_X=PGN*)2}h`O^Syqh7rYLP4^BG2tF;sA@8#s z@!0shZ72M_PPO-a6Bk`XzMcp79Sb0|6(7ag=Xx*(hyGF2%KXp>d5)W}`}k`wv1#Iu zjaes+^%_B2V&3w2Ok2tbZCzETewypVKD%`77S7J@#uy+4l6j;OT17W)mKre2!~^;9 zD>ZMwLo60LR+}<*j3dLzcLkDprTAfDn-HVP|O@ofRff7LUQ=n`hx#Q^efFD`q5-lYQvV z#&K}O%rW|yyiN^M%sDLgVKlnHcmPh5Tcvomj1Os0*)(an(wQB{mG#aF6}!6@EcGuR z*M~Xr4E&4-n^ zEgn9*1n(Ax?#O3Tj2p-*t8GU&-RT9}gKaaUP4&;(m~p77BsvUnskg*6^oFc7JPsRD zl^~8pxE5T-=I3I6lToa_v=w(7P-&wJc|^O~xB4dFQ*i0PhmPHT{jpL<5Y4}CA6 z<0x(FRZ%lt7lYa+sR?RP?V5oi?*2ip!6|r`3tEk;*IlY)7CU3#`eDZ|jPwNm{MFBH z8kL=fC(nGeK^h(jE@ZghTl^&VuD-pf*?5D(yS8OMF+Xthb}-5GY2Rvk+SsT`sN#9J zdlp}V{urg>cK%q`1KE|z$VfcTkqNzB?gCg9)9`L$lH_bOAFAFpRGcwt*oeI_rl(F6 z9m)${TGUSS%7gVNDb{5T1g)S_%swXzkG;5R`w%g)c+bwVG|Pdw=mwL0E!shuHM31m zzj+TSQgDM+V-l)z9zIrSgzvRPxcfwyj4jbi_Dw;n_pEL zbgmjKy|^tV7UumZV_u<**rFD2?bPr@F0-YYy*3W!{kG0msglK0RIk!k?4|uf$M&xd zLD(PHx65(J%ngRLitmwcf>HUXkk)gLrOFoV%jR?>C!@3x0Ryar<|ir-ayAwHU=?jo z%Um(ENENC%tJwE7*ivGCV*Gx!Orgb2wI;Q6UC`<|gT7a~(t9t`r#0kTUFfjPjnpE+ zloQH8g2}_7bxcBudDKF8sAt!o9;Za3TKX=$*x2dRJWX!NdiMKP&Uz`n?)m5Z;P(#Q zCj7@q$BgA)|Lzr*0?<>cN+4M~hX3IiTWOg3(Dau#C}?ai{Xej(i7jolw?m zkXxe1^T&oVg{V|o5eeENd(o4_5~Pq~2D-rZ(*x;MdwR-hgjAj*TnX!zgeR-U8gUC4 zB9DA4XGwlgQmhU5>5|C2OR5M|6%L7uR)zC!Cr0m7L3Jl828UJPC4rndKU_9us@`y| zUdi|X-71gN-WPI;{Si3ZPokUV1rN$`#sdodX*+f)sd$-dKB@S&xd5TX_Aa@F)Jn2k z^!k~&QH~rTVn_DO`+f7jdm*E2&4i~C`@&~;%riua)c+e@^Rc%Sse&z-+( P0v@=5seXl?{jdK5<93yB literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html new file mode 100644 index 0000000000..f752133c7f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html new file mode 100644 index 0000000000..8bd1a05d3c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1BaseStorage.html @@ -0,0 +1,283 @@ + + + + + + + +Cutlass: cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage Struct Reference
    +
    +
    + +

    Storage object. +

    + +

    #include <tile_iterator.h>

    + + + + + + + + + + + +

    +Public Types

    typedef Scalar Scalar
     Underlying scalar type. More...
     
    typedef ShapeAdd< Tile, Skew >::Shape Allocation
     Shape of allocation. More...
     
    typedef Tile Shape
     Shape of array. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Scalardata ()
     Returns a raw pointer. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns a raw pointer. More...
     
    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Index leading_dim ()
     Returns the leading dimension. More...
     
    + + + + +

    +Public Attributes

    Scalar scalars [Allocation::kD][Allocation::kH][Allocation::kW][Allocation::kC]
     Data storage. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Allocation

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ShapeAdd<Tile, Skew>::Shape cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::Allocation
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::Scalar
    +
    + +
    +
    + +

    ◆ Shape

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Tile cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::Shape
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar* cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::data ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ data() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar const* cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ leading_dim()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    static CUTLASS_HOST_DEVICE Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::leading_dim ()
    +
    +inlinestatic
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ scalars

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Scalar cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseStorage::scalars[Allocation::kD][Allocation::kH][Allocation::kW][Allocation::kC]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html new file mode 100644 index 0000000000..3acf4206bc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params-members.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params, including all inherited members.

    + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize()cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html new file mode 100644 index 0000000000..be921381e3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.html @@ -0,0 +1,391 @@ + + + + + + + +Cutlass: cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference
    +
    +
    + +

    Parameters to the iterator. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params:
    +
    +
    + + +cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params + +
    + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + +

    +Public Attributes

    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize() [1/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Index _stride_d,
    Index _stride_h,
    Index _stride_w,
    Index _inc_d,
    Index _inc_h,
    Index _inc_w,
    Index _inc_advance 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [2/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Index _stride_d,
    Index _stride_h,
    Index _stride_w 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [3/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ inc_advance

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_advance
    +
    + +
    +
    + +

    ◆ inc_d

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_d
    +
    + +
    +
    + +

    ◆ inc_h

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_h
    +
    + +
    +
    + +

    ◆ inc_w

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::inc_w
    +
    + +
    +
    + +

    ◆ stride_d

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::stride_d
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::stride_h
    +
    + +
    +
    + +

    ◆ stride_w

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Index cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::stride_w
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.png b/docs/generated-html/structcutlass_1_1TileIteratorBase_1_1Params.png new file mode 100644 index 0000000000000000000000000000000000000000..f1c874633c8c272f7da17bc560140267da64b932 GIT binary patch literal 4162 zcmc&%dpJ~U+gG+~6SKRfP_M(5F{m)?N`$E>4HGtYB4?(PoKJ%plA_ra8iz5;p(vGd z&d3ZCiDsO_mZoutaY&dk#0)dTyM|o*eXsA^|9#i>J=e9?dY-W1C&d$bC zR(g-LgoK1F;x236z!0XCs$Wjf$HqS*C&nPsX%S*dh_PZ zxpMSbpt&u~-o`-!FcOzux}$-F1gHyPaoiETX=Y%?Ufs0x=J5XF;v=i(gTIZovLk+l zP4jjqX3Qy{AnDL59xo_J3#A^|6ge2>*c62r=h(pirDd1zirOr_lTWKs%s+M+bu7Ny z``TtH`6`3_Hz`H$B&4kNOG>B9OYYPv%g@*(6>FliMayom^p1z^kQT9L>9@}48eiUb z%Fe;jZ>-6unHPp45Bf`(y$|nAE6!sI>7y~N@=MnZ`8ge}k0SeYpqQ_3;CmF2u2(U$ zsb*hm4K_*9^XN?#N6iKf<(9OdNY{4xRoYfjnd!FlrYJ^Q``4xRI9KQw^X;08tNXyS zY9BjtYcgQmhZrYl{;>ua+imVuPtKA^u+aBfaChi-MO_I-6yr9xJh$kHjxO%c;rig* z_sq6;4@gaan(^uH_+r*Z%FuP z_n=~1YK0v0oz^`u zAct)&H@uY+Y;1m0*dcp1a)9^pbyjndoUkCip=k!qv8>L-2~3Z;S8LyB;GO%)F8>&# zwdpSu2gDXdlBRWt=Xm%&E71ax+>_vLCcrwVvt_-}NBzC)u1a6cHjL!79F{jI?_YS4 zU}-4>0)_j&ME6m2T~@hI1L~G3--zO|KtlEShUCzZfJ~L4BsEiVYc&Nqe>Z-!6kT8t ztX*O4uj0c0a3x~Zb&%uoq0XMy+}Rp26PSu)bo)G1Tnu#!o|lYi4ho!!b-{GDeF$FE zb;5n|9jQiN;X46iwk#UuY;D%hhO<3Yhf7YDJo3=+QUiA?^DQM4Bul|bNYhlD+VRLC z(4z-E&cth*$DD~Z3QK%!D(;!zc1|R|G7U%L*S*$5c@On{JmaU(-e~wle~H@KRANMB zoH#}Nq~T3sy&E4|oNcC9{(J=5Mmv~HGlH-i{ywpoku7P|)z{ku_K?gj8`ZTZLI zEA$G7e1;|#-%(O=C_ju9RPK6JDfbT)RUI5NWlL*faA*7&1*&{(0nZeV!+txfav_bh z$_^eeQwd{D9wgABf>f+;y0kWd*TZ6|UZ-94{p>^d3x}8 z%ii4DxtaF;Zv}m03*OEK@Tz8>0dDfCI6^eC@YZ|YlIE@eVekX1Q#m9M@Ju&y%r5=Ylpw8mH{UnBtJwpOb0P43qxo+$TaDlid@pmAA#P4MM zEVhfk?)3xN@an1f1F`>MsCeqfcX%oU)uJK_aL@2(RLQDnTjza*0YLn#&yXg%w@LQBjhDP2 zwBLkIH}&5)xuwSrqYy$oG1~A`Q7=2gLrC~q{f=orXbe| z`0Y5hZA)|(X!+8`K=!gPH>{*qEDQ*PU^nWgmP>FNEb&H1?D%`jqlo6bEw95!oPi-}KW{DZbu>5u?Iz$md?KDh1!prwe}4EB12MooZYW89@&eA$>}RC;sScJ7aM7B51)OUW2xw73B}z zvG6}b+^Ly3_`ci2O}2=LDwvDnP(Ez%9`5!_>upK*jeWld74ni2*dA?etbWaaNV@r< zwo28$w`D`*)doNOaxVsa zwyP9h_B4?vw-5@~A?gAPs9h&qZJar=^26|DZa7exo0(A`-* z5smir)JU$xEB(Pc7kZnK(s4nx-KCeXaM5B7UI_XW)XZHTl-4ai=!C{B z)W|+hvgymwNwh$f*3{5{zPIcX{fZ>V@vYhSqUXbVTLKXS>eN6bZo!o!g#p7}8O>9Z zs_IcelTW_urG8$%W-mBeYUgv^#*Ru zh&(L;;>LpwR(Nr*a&t^T+Itj|>q|l-oU>2BUAwp@i@h)o%H~t*irbAo0>L3hNrYuL z4m%_*KYOF1%<*sT!r;B8KaZn-W)mhe`;!c~EvR4_%)RI0DV^Wh{3{qSCmWS|2NKm* zB(Jf%Iq3{iX3$ln-@|ENxn2j|-R!g*L+>D1=0QNt?=A2cYZpbt+!h_c{?EYu;ES)~ zGU_2w$+kLH26t@lXDLu#35|MerRD@<1h+xB?cjVowv7=Uffw#ry@nLWmxsIJ&{-&c z^l16f=cToe5OeY#-6mI^*{$E@OQ5&+9P5{KI!k@Y50f=$>`YGTca3UR$ z?+rx)~jxTY=(z`B8Yda#pCRDVPBJsZgTn}8-cMlTRG})(#Pzfal=N;;Vx0F{0 zW1?d#(6KAXCncS^0}Eak@RZIj1YTJA!x@cC$wv2dgf|o3HxRrzWLx}tj z(~zZ)Vyh(u)JWL!KzoO9MYKG%z$k_DI8&p;qAAW~OFh9XJMzB%ZZevTJYpFtGq%t* z)4>0(3-&3LF|9pNi>98Zkgf2_gwu=n<7~GCa6nuT!^Ieh53~gGiS#%-XuugGBU$w# z6Gb59TQL&V=;U239hW_Wt|A9vC#%Vkq@pBk2Z6Hh$P-&0Vt*i_S^s9l9q(?Ymvx%S z45gi6g%7k{bL9wyzN&%OysG0JL_L-R)y{q>2Q|XTgFiPG?(gYw08-N?kgs=O@IVD< zhwcFKMDWT|EmhyIwN8TuKPL8&w--!Mi3|wynJ2dyQ{ft9{cMciAh!9Dpu@W4p=%81 z1T;~*2+LDJjeLH+m6)Ydvb^{^^vNh>cMC+;u*jboQfY+gtn`GU0q&WH@XEn|dODhMLBSw6u$~-I2c~W{D8C{UWCg6sCd%*SK0v-u zhSEp{Yr&uerq(iFkJVMzi`mcGiGuMEuteC9Xjs6MY6m$+5$aYaiq#JrYh?`&2!Z1Y zEiilE6yEhKIDy2BNWsQ_cy*t66N!{z)1!yJQb=`W5E@DRUDByq0Ez8bA%4t+q%5g1 zk$Am)akni2H&i!2#Lg?4pP;$9U(}?fJlW{S>WisF9MeHxEZ50HyholV*EmLq7N@Nn zITyeZGHVIfXo~}|6B}d%WUuGS{$`ZUEdW1EbgphXF4=F-`u?s8_~Dm8oV2khIpKNz E->OpV*#H0l literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator-members.html b/docs/generated-html/structcutlass_1_1TileLoadIterator-members.html new file mode 100644 index 0000000000..6acaea33b6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator-members.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Base typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    BaseParams typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    data() constcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Delta typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Fragment typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentConstIterator typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentElement typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentIterator typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentShape typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    inc_advance()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_d()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_h()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_stage()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_w()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Index typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Iterations typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    kAccessSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kAdvancecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kFragmentSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kIteratorFragmentcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kMemorySpacecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kRequiresLoadFence enum valuecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    load(Fragment &fragment, PredicateIterator pred_it) constcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    load(Fragment &fragment) constcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    load_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    load_post_increment(Fragment &fragment)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    paramscutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Pointer typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    PredicateVector typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Scalar typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    SharedStorage typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Skew typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    stagecutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Storage typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    thread_offsetcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ThreadOffset typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Tile typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    TileLoadIterator()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Traits typedefcutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    valid(int d, int h, int w, int c) constcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator.html b/docs/generated-html/structcutlass_1_1TileLoadIterator.html new file mode 100644 index 0000000000..d670b93fbb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator.html @@ -0,0 +1,1253 @@ + + + + + + + +Cutlass: cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference
    +
    +
    + +

    An iterator implementing Tile Load Iterator Concept for loading a tile from memory. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    enum  { kRequiresLoadFence = Tile::kD == 1 + }
     Do we require a fence? More...
     
    typedef TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
     Base class. More...
     
    typedef Base::Traits Traits
     concept TileTraits More...
     
    typedef Base::Scalar Scalar
     Scalar element. More...
     
    typedef Base::FragmentElement FragmentElement
     Fragment element. More...
     
    typedef Base::Index Index
     Index type. More...
     
    typedef Base::Skew Skew
     Skew quantity. More...
     
    typedef Base::Tile Tile
     Tile shape. More...
     
    typedef Base::Delta Delta
     Delta. More...
     
    typedef Base::Iterations Iterations
     Iterations. More...
     
    typedef Base::ThreadOffset ThreadOffset
     ThreadOffset functor. More...
     
    typedef Base::FragmentShape FragmentShape
     Fragment type. More...
     
    typedef Base::AccessType AccessType
     Memory access type. More...
     
    typedef Base::Fragment Fragment
     Fragment definition. More...
     
    typedef Base::FragmentIterator FragmentIterator
     Fragment iterator definition. More...
     
    typedef Base::FragmentConstIterator FragmentConstIterator
     Fragment const iterator definition. More...
     
    typedef Base::PredicateVector PredicateVector
     Default predicate mask type. More...
     
    typedef Base::Storage SharedStorage
     Storage object that may be loaded from. More...
     
    typedef Base::Params BaseParams
     IteratorBase parameters. More...
     
    typedef Scalar const * Pointer
     The pointer type. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    typedef Traits_ Traits
     concept TileTraits More...
     
    typedef Scalar_ Scalar
     Scalar element. More...
     
    typedef FragmentElement_ FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Skew_ Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename PredicateIterator >
    CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator ()
     Default constructor. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns the current pointer. More...
     
    CUTLASS_HOST_DEVICE void inc_d ()
     Increment in the D dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_h ()
     Increment in the H dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_w ()
     Increment in the W dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_advance ()
     Increment in the next dimension. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment, PredicateIterator pred_it)
     Loads a fragment and advances the iterator to the next tile. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment)
     Loads a fragment and advances the iterator to the next tile. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void load (Fragment &fragment, PredicateIterator pred_it) const
     Loads a fragment without advancing the iterator.. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void load (Fragment &fragment) const
     Loads a fragment without advancing the iterator.. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + + +

    +Public Attributes

    Params params
     Parameters structure. More...
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    int stage
     Stage argument enables wrapping after some number of tiles have been loaded. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static IteratorAdvance::Kind const kAdvance = Base::kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment
     Specifies type of iterator fragment storage (Salar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace
     Source or destination memory space. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    static IteratorAdvance::Kind const kAdvance = Advance_
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize = Tile::kC
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    template<typename PredicateIterator >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::AccessType cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::AccessType
    +
    + +
    +
    + +

    ◆ Base

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef TileIteratorBase<Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_> cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Base
    +
    + +
    +
    + +

    ◆ BaseParams

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Params cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseParams
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Delta cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Delta
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Fragment cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentConstIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentConstIterator cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentConstIterator
    +
    + +
    +
    + +

    ◆ FragmentElement

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentElement cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentElement
    +
    + +
    +
    + +

    ◆ FragmentIterator

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentIterator cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentIterator
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentShape cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Index cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Iterations cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar const* cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Pointer
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::PredicateVector cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Scalar cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Storage cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ Skew

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Skew cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Skew
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::ThreadOffset cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Tile cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Tile
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Traits cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Traits
    +
    + +
    +
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kRequiresLoadFence 
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ TileLoadIterator() [1/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileLoadIterator ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileLoadIterator() [2/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileLoadIterator (Params const & _params,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileLoadIterator() [3/3]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileLoadIterator (Params const & ,
    SharedStorageshared_storage,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar const* cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_stage ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::initialize_predicates (PredicateIterator predicate_it,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0,                                                                                           0,                                                                                           0) 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load (Fragmentfragment,
    PredicateIterator pred_it 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ load() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load (Fragmentfragment) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ load_post_increment() [1/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load_post_increment (Fragmentfragment,
    PredicateIterator pred_it 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load_post_increment() [2/2]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::load_post_increment (Fragmentfragment)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAdvance = Base::kAdvance
    +
    +static
    +
    + +
    +
    + +

    ◆ kIteratorFragment

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorFragment::Kind const cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kIteratorFragment = Base::kIteratorFragment
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kMemorySpace = Base::kMemorySpace
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Params cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::params
    +
    + +
    +
    + +

    ◆ stage

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::stage
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Coord<4> cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator.png b/docs/generated-html/structcutlass_1_1TileLoadIterator.png new file mode 100644 index 0000000000000000000000000000000000000000..30866fa85d7157dba4240da683495a780e57590b GIT binary patch literal 2304 zcmchZdo+~m9>>RhB4paFND>X!c3V>Jmr;lzxkoNFN5ZHXOc)ur_70I6-XUZ%?VU!F zT#LbGl1nZnh0+YeHcT!vh@p%b=dHb4d!0YdI{%$_t>=Azzi0h!>sim|yWT91Q%>@- z`(z;yh&i z0S)`5*%PAcaK{g~5N9t&yd@u?0Lu*JzDn~KEDiF%gCbcDJW#y4rsTSrGDkGZC5rdIe1Wg(QSCRB7`-CtwUpp}Ou#0?T>a+wd__iX znRLPZ(cf+!NzhLxzsE3dS`q=WKW=Y*rLAc6*2%mD?GAOJR~ZboY&USoL<&J#QBgZv ztu08&iPN&U@_jyB2rMqW>F#Vgle*bcJ&E8C16IZsz$M?lxiga?l z1Ed1g*7lagN+v)KYK;ryde7BCgBz8CWRs6+ojF|1KufErRLh6+Vt>>I?9tUK&oyXn z6g8Tf+5E8!ZsxS}xSl=c2ZFPf$oQRWF;T@BYIuWL<`P3|cFmg8Z7Yt;Y+SEyRQ$<7 zxh}QlcwfWL0U6>PaVQ$#J2q)-a550clfXW5f9-j6d2rulcixezI%Eiru^D7Wi+5wr zzW(~J(W9?&8H9cB7(O*vLsA}G$8_9M;c-k=&Xs)3%ZIz(YRlA@22W=y{o)%U(CE#M zW3leNGAd$X);*QPaf2pX=n!Cdip))Z0+7j4&038rX53}0Hr}Ty!u3l3{4+0OIG%BH z=roUtG79M6?B+G6fcHhhm51soQk5xeCq~~l)3rNigu%g{a6H$a_vdJDIJ}@}Yxg1z z$bpGHM7Z7xyX~xJyMD9T+gt&54~x`7IAQWKjY+|>2o%l6Gk0ws1Z4w{l0JR7&b*JY zCNtw|XnVHvm_vpt0D<&)ZAte>#Vk(Xr#1>Po!Rar{K~zagmr6?I~mdQ;PZ^XQM{B8 zn}IOsZ;Y{xEabO;(}waB-z2&MpHnZ4=;bc` zDHk7CA2q_SHT-gHo%%GHPRnRVUK*bGeW;}nPISQ{H?KC)O5j}y7vQ}w&+mObIl5WA z;Qo;n?T0+k)<2a`N!_zp4hWPjI86m*Dy_yqDEp~gRCKs4Cs8*~k>mzYSLC}&+!xQ( zxCR0wbp%aoPUc?4RwL0HDZA@w5Fo;D6?6hjAV0TkCKn{*8r&cg-UyP4wt&;LKwSGZ z{>Jqjd*UCO_lnzzPgYOvwG%%mRs#c=kR53w%L{&Ws-UaLH zg?ui=AA~+lk`6L20f&Ide+%FLJsfN#Q1Sg34$`}pex)9xmki?!!`bx8b3`3YMaKV!!yCD8Dx$V3i* zT2$f`nVnd`x)7W2$DJgVo-w~*dhaFgb6V2ohqIcGGf%C;7(VSeAJ-9K(yBN?8$Trg z{~j4VPw}vU!o;GR7Jnu9Z>K4FtE=~fV}+2@h7*?c>lN1;7pF~>l2)2s#ZC__@cLMx z?7$F<%jI(B#hCfZPRM>2!YimkFBcl>JlEu@sC%)aR}E;XC~A)hx|?9N_HjD%BlGA< zS)-6&35nyuixRZ^CB7Hy4Lbp-r`DhFMRHG{jl`21GeKQ$!OU>1a>s3m?QP2f$ zFjC8K&rlauq1=Dn!!MJH&S0Jugei{5j(*YBo$RFg^c*W}J*0^0)c14BFJ*?=$oR@? zN>+BROVosn_srm{=5_ET$v)RWow&1UcsNZVT@&*pZMWI14BK>?sy6Y8eQW3&D~jb?hK?%A8Vv|+36v6Ke( zI(|j!$V`-RtL>;1;$_>sv{?C_P2E~g!i^J42k3{2<{=1~>bKh&R*3kio!BZ0O;jf2 z3WbK5-YFvA8j1_=JllFeRG@_V4@r_o?PGbge(fxkYH1MDL8Ra$wjf7+MS+gQ0EOlD t>1z1x|Dgn5G&lZY)SwpRb(cuY3VnMA`q0zE;O`0q>3GWF$#K*#{|0n|T-X2r literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html new file mode 100644 index 0000000000..1977795eae --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params, including all inherited members.

    + + + + + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(SharedStorage const &storage)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    pointercutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html new file mode 100644 index 0000000000..b25879f36b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.html @@ -0,0 +1,350 @@ + + + + + + + +Cutlass: cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference
    +
    +
    + +

    Parameters. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params + +
    + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (SharedStorage const &storage)
     Initialize params to access storage object. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)
     Initializes params to access a raw pointer. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Scalar const * pointer
     Pointer to memory. More...
     
    - Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize() [1/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (SharedStorage const & storage)
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [2/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalar const * ptr,
    Index stride_d,
    Index stride_h,
    Index stride_w 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [3/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalar const * ptr,
    Index _stride_d,
    Index _stride_h,
    Index _stride_w,
    Index _inc_d,
    Index _inc_h,
    Index _inc_w,
    Index _inc_advance 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [4/4]

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Traits_, typename Scalar_, IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Scalar const* cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.png b/docs/generated-html/structcutlass_1_1TileLoadIterator_1_1Params.png new file mode 100644 index 0000000000000000000000000000000000000000..9993389178ac171e0b46840981fa7181857144c3 GIT binary patch literal 3293 zcmd6qc{o&k8^=ea)D&eKI~hu7Oq52JJd{RuQV7{e24l_GdT0u7Ga4!pdTQ)j#0j3c_s{ow&vl)1e&=`H_kHf)IludJeeW0(BfS&HgpUCL zfD`bWx@G_XJCHSgc9flU4Re1h%({V03@oni?d`FKi@#4>GXI?Q{ii8m!uEYYIf17W!OVJ;nu-r(&czlW|>B4+X;O3W~AH1}yz_Zcm zKJzK!^T!2-LKaCFIL?XayYgaQY$p-)S+ra<N_p_YeTN<5PwZJjZwyR03Ek$|LhKl|2pv)iD^ORb`zz=(; zP>cS&jF4#XrQWv$P6mwK_ocHYVgk`?mG~3QuQL$X*fKgOWA9uE8xIQOGK1Y;th>=G zJ4R9-`Oq=9^krA0*;A&%msY=^Cfj;qBKvq_6ym!~%B@UfpgiY_HC3w~63`zjHCSSs zYis8ElzK0d98-4+juf>Lx(ji;g==W+Nq2JH#)DNc>OoX%j{v*qOe*i&<6+li1{`u9 z+Xw|a=U;{e5-f2dMDguHC*03-U7q@Z=g`SV!b)I*?E|F4XE|Tr3V82oMGB;|AuI6^7G;#m^a5Tm%x4MM>qXK7Av z4}~Ls!-MeWdbpvF^ZiICRq~gla)REC$MFKN2TxXGE~-np-(J%I4>)wJtGa6d)pTfK ze9_l{iNLC@CVPAMQynu*>MBj(?6C4KTooT=FT* zK}4|Ld{rmHzt?ij#R&LRpE=d%@$pncwy>sv&RNo$?2A3_#&7(!@{URcRmR%W@9}El zRf{))UNi4`t6G8Rs7#51@OoRBH%sTEUel|FP;lX;LPv`J zpk1fT?6CvG9@%a5CkENT9{CfG)bCcbuoagiG#lB*%SLTmV-cD=cfC z6_qvLbKuIt$-blUfnypU`?W(#8giWfGl#vmE>^FctzOAky>fXYBoN8LO8@nL%0lSb zIBHoJ{B>>O&Iz)2-jA*c9gOk?R9}H6akMD7@ccgFPF8aqow>lR1b!x1$E7tsq^dC_ zs8GI@Z4Bcaqj(L4;!%29GH9hEb_!@y1HhSZEq{B6I?`$x4WHJbczn!(0HRXS za(XP=n81>QV9Dt^U-=PPq1@$~X+lYD7X!i=p{h+^vRM;N3BTj@%PpPiiQAFKO{TfJ z1YBbOX5u%P<_&@J+<{L`Tz2*Ids*(-7tuzL*8KHz7-?AZ#ozCc@vpzJ)M!&K=6l6 zLj=2>KoPxabG+R5muWf@SZA6>PNbUvFL4_b-mpvlbZb5t99Us-_Q9(ssZxA8ybcSd zq8;VT)x4)q>Mmu!Fpm^k^GV0J38Ey?Gp+$dk2>xu(Oj2r@AZF)ptZ6ssD<*LY^sHt zbLJQ}6?zBpYd4~-Agg6S8Zx}=2c>7~yTvt`{Fv?j8hDZTakc3JqC=Xsm_8?160Luw zBPUX%Kf@ZKHce&GjqvzrlZm`DU&dJUyW0cn0Q$$NiA7S@+||F~h86z>YXPX~$ClO{ z`ZsKImZSp>VKo$?L2_KWY<7wE?6FCKwUHWSfXZDchhqLn`|kQ6*IC*Pwi)*3KSdmv z5|1R#!RCO|zrXx%t^uw-beq9qklhqdc0QFNqp0Vg&rD!@YK&Al{O2A!)W2yV7GxkV z!4={^-+CO|`0dGsD^katW^Dl&?au4f56+PmN!zF-c{GMZjb`q2cOD7=_08vTD?6M_ieg}Xv`XH{xGfB6 zclBPMiWh*kZA0q}Hbl6wSZ3by>LSl5c8CV+4?IsJ=FfDn zyQ~*L; zcrbd@hp>}8dkgc{!YIl_Ofozm?h+x5G=8Kx2ur*Z&Ran+69`ZTHzEQxDfnm!CUd8@?h<_}+z3 zuHRmpFc9;*<2_uEh;Lbadb2xgU-Tu}^JWEN_#vknyb0Wv5(#TgrHhk}>YF{5!@ZDQFyz?-XrrU?`SxqzV zAYP;@`odisO)TyTUeAJ;G*tAX#&h+ly_~yDcX&5x1@7pRoXtzXq@6$8T_;I4unU}B zz#_Uwa#}hr;6w{r*rBBY6OD9rAl4U25xKXixVq<;xH#)bu}3Z})^s8mBNksxfYY== z;W=cxH|!qlq5@Bp)v}F0iT23c5;)?Rm|6|G=K~%X=PMHtw%iC7H0K z(xY+V^62lC#h^c3f8=^XID-a1bbh;+%Xq4dM2lDMHKqQwPQos=s5~`3-Czo1Zqs|* z?K7^|f4po=e#4(Jb`pCsL5@equvk`3B!;_@CnJbvzS#jfhW9Z9EPRnoOOS`qKT0CX3)Aikh-K}BDpe4TQ@q3b{W)J_Qb8C)!Z~tJTLAOn z+EA{rU$Y!R&WKfIHZ1i5Ad&TBYpozJb=b%^g#CnMXOJ Tpo7(m0N~e+bPNBu|K~pemf;i) literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator-members.html b/docs/generated-html/structcutlass_1_1TileStoreIterator-members.html new file mode 100644 index 0000000000..f24d2dcd78 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator-members.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Base typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    BaseParams typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    data() constcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Delta typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Fragment typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentConstIterator typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentElement typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentIterator typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    FragmentShape typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    inc_advance()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_d()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_h()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_stage()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    inc_w()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Index typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Iterations typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    kAccessSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kAdvancecutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kFragmentSizecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kIteratorFragmentcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    kMemorySpacecutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >static
    paramscutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    PredicateVector typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Scalar typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    SharedStorage typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Skew typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    stagecutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Storage typedefcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    store(Fragment &fragment, PredicateIterator pred_it) constcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    store(Fragment &fragment) constcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    store_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    store_post_increment(Fragment &fragment)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    thread_offsetcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    ThreadOffset typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    Tile typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    TileStoreIterator()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    Traits typedefcutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    valid(int d, int h, int w, int c) constcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator.html b/docs/generated-html/structcutlass_1_1TileStoreIterator.html new file mode 100644 index 0000000000..4fe6f216b9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator.html @@ -0,0 +1,1210 @@ + + + + + + + +Cutlass: cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Struct Template Reference
    +
    +
    + +

    An iterator implementing Tile Store Iterator Concept for storing a tile to memory. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
     Base class. More...
     
    typedef Base::Traits Traits
     concept TileTraits More...
     
    typedef Base::Scalar Scalar
     Scalar element. More...
     
    typedef Base::FragmentElement FragmentElement
     Fragment element. More...
     
    typedef Base::Index Index
     Index type. More...
     
    typedef Base::Skew Skew
     Skew quantity. More...
     
    typedef Base::Tile Tile
     Tile shape. More...
     
    typedef Base::Delta Delta
     Delta. More...
     
    typedef Base::Iterations Iterations
     Iterations. More...
     
    typedef Base::ThreadOffset ThreadOffset
     ThreadOffset functor. More...
     
    typedef Base::FragmentShape FragmentShape
     Fragment type. More...
     
    typedef Base::AccessType AccessType
     Memory access type. More...
     
    typedef Base::Fragment Fragment
     Fragment definition. More...
     
    typedef Base::FragmentIterator FragmentIterator
     Fragment iterator definition. More...
     
    typedef Base::FragmentConstIterator FragmentConstIterator
     Fragment const iterator definition. More...
     
    typedef Base::PredicateVector PredicateVector
     Default predicate mask type. More...
     
    typedef Base::Storage SharedStorage
     Storage object which may be stored to. More...
     
    typedef Base::Params BaseParams
     IteratorBase parameters. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    typedef Traits_ Traits
     concept TileTraits More...
     
    typedef Scalar_ Scalar
     Scalar element. More...
     
    typedef FragmentElement_ FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Skew_ Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    template<typename PredicateIterator >
    CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    CUTLASS_HOST_DEVICE TileStoreIterator ()
     Default constructor. More...
     
    CUTLASS_HOST_DEVICE TileStoreIterator (Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile store iterator. More...
     
    CUTLASS_HOST_DEVICE TileStoreIterator (Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile store iterator. More...
     
    CUTLASS_HOST_DEVICE Scalardata () const
     Returns the current pointer. More...
     
    CUTLASS_HOST_DEVICE void inc_d ()
     Increment in the D dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_h ()
     Increment in the H dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_w ()
     Increment in the W dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_advance ()
     Increment in the next dimension. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void store_post_increment (Fragment &fragment, PredicateIterator pred_it)
     Stores a fragment and advances to the next tile. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void store_post_increment (Fragment &fragment)
     Stores a fragment and advances to the next tile. More...
     
    template<typename Fragment , typename PredicateIterator >
    CUTLASS_HOST_DEVICE void store (Fragment &fragment, PredicateIterator pred_it) const
     Stores a fragment without advancing the iterator. More...
     
    template<typename Fragment >
    CUTLASS_HOST_DEVICE void store (Fragment &fragment) const
     Stores a fragment without advancing the iterator. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + + +

    +Public Attributes

    Params params
     Parameters structure. More...
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    int stage
     The stage. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static IteratorAdvance::Kind const kAdvance = Base::kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = Base::kIteratorFragment
     Specifies type of iterator fragment storage (Salar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = Base::kMemorySpace
     Source or destination memory space. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    static IteratorAdvance::Kind const kAdvance = Advance_
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize = Tile::kC
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
    template<typename PredicateIterator >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ AccessType

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::AccessType cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::AccessType
    +
    + +
    +
    + +

    ◆ Base

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef TileIteratorBase<Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_> cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Base
    +
    + +
    +
    + +

    ◆ BaseParams

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Params cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::BaseParams
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Delta cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Delta
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Fragment cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentConstIterator

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentConstIterator cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentConstIterator
    +
    + +
    +
    + +

    ◆ FragmentElement

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentElement cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentElement
    +
    + +
    +
    + +

    ◆ FragmentIterator

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentIterator cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentIterator
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::FragmentShape cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Index cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Iterations cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Iterations
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::PredicateVector cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Scalar cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Storage cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ Skew

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Skew cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Skew
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::ThreadOffset cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Tile cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Tile
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Base::Traits cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ TileStoreIterator() [1/3]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileStoreIterator ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileStoreIterator() [2/3]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileStoreIterator (Params const & _params,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ TileStoreIterator() [3/3]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::TileStoreIterator (Params const & ,
    SharedStorageshared_storage,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0),
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar* cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_stage ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::initialize_predicates (PredicateIterator predicate_it,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0,                                                                                           0,                                                                                           0) 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ store() [1/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store (Fragmentfragment,
    PredicateIterator pred_it 
    ) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ store() [2/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store (Fragmentfragment) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ store_post_increment() [1/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment , typename PredicateIterator >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store_post_increment (Fragmentfragment,
    PredicateIterator pred_it 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ store_post_increment() [2/2]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    +
    +template<typename Fragment >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE void cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::store_post_increment (Fragmentfragment)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kAdvance = Base::kAdvance
    +
    +static
    +
    + +
    +
    + +

    ◆ kIteratorFragment

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    IteratorFragment::Kind const cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kIteratorFragment = Base::kIteratorFragment
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::kMemorySpace = Base::kMemorySpace
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Params cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::params
    +
    + +
    +
    + +

    ◆ stage

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::stage
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Coord<4> cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator.png b/docs/generated-html/structcutlass_1_1TileStoreIterator.png new file mode 100644 index 0000000000000000000000000000000000000000..a20f18cfe5132b95f7238f8db747c862f04b5455 GIT binary patch literal 2309 zcmchZX;2eL8itc_CxQYh6S)>lI0O_CQRI?CB%BJ7W94?D2qEDRB9Ti#5oa`PgeU>z z2qGDfLqX&S5TXde1W1tEghOsHC=nRKF_E3xs{OIM|F)~T`|Gant9rYu-={k5g7bN4 z@F6e&0FXx5*|-7#AdWD;ASogoq1dEF;q=o5N94JkogHDg{C&2#H7Z>g?e>7s#FiQMr&ZVn%vV3heikqFNXncHIg_gKMiHiimiGRJujOwX3F|0MN^y6Gh-U=}>Gx3J<9&iGw zZ_+qa#hK@4y7UPywI4fo)FO$#8Sv*`Xk>nj)S?e^kPCTJF0C zGgdN%l;t(SJLt%E!Ew#)3&cmsbk-gCzs}0E*IahBbZg$B3D$sCRsopdT`gzs5p-mymziBZwU1`JLhl zzEmsHPvg*Mn$aO`M+>{7*V&O`9&NXzUy)=K0z4jgm4LOTD!4g$+blAAlmtuVS_ogiLjG@CwFD#rywAvoxP3BSqw>Ex(hl$7YDLs| zD?uBG@{wPtFHei|8K4R=u@2n?+5upIX)BiE-+=n6+nb1Z5g4G@`kzbl!Q4%8J3!an zqElx<{`Rmspd3KxcIkA$6QS7?MO#wr@WONro85FZA_pLqDD@A%u*C7+Zi$c(|JRb` zNKX<9q1*WqMnZW_gCJ)?!c)orM_q;1JOTqUd~76dzJK7NOGQW}Z+h84$sRIcnw1f2r}#txgpyJ8T%KiC{A$)eoAD(n*pH4Lfjy`BEbi~nZcpy0@am(09*0l zXx|tF&ndJrAv^;BRADKO$vS!nc_m_MwTMo9oS4ZP{bpp&eBIlax0D;X^@=%KI&4cX zuOg(fuL=r`#qKY@mFfZ@wRFuU%;e{XYM-MNhjPlOlM4RtSOTQU5u_=P1j$urXQoyN zu6=ir$khydO4XiM`IQj#3Sa%lg@=@5IH2aP%CWbi^WbpfUKeih#z@7O#Z(`wLBF+* z>!O~#BylZ&Sh65X=c!nhryx<*O^x?HXWhD`gYNy@-GZdw+3z<+zteM2x-yVc+W@us zxy-$+t{!q48aiLv0#IVEl8-B~Mr7N2M#3pd`}+(YHndz_ z7e|^Qb8dOg7^Tg{i6&%{oAGGMH2w#s5(I&YnTJSQs78=8+71QtoN`rs6tu*e8lcpA zvC7ST_R`N(DFe$qS3$epAZs?5*=VzQ3PKRqg9Zi94xGtcL|lOT3G9quSXpXk+?s~ zFFViUIDc{G8H|n=Hb28vyVUc2TkHid1Mz;RzF+cBl<%(SiFLwQ^SZ;E6GlvV+3YVO z-IEi=i@$6Qhr2GS3?Ws`<9~C@zn9e~SYkL1ew4e%KFyZ=cScoa<$X>4qYZ vWtO|Y9=;9#ALciorRjDO|8lJ3ogKh*F)8LYLOntFX8|D2Iop(5`6vAep@d<` literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html new file mode 100644 index 0000000000..5d34eba8fe --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params, including all inherited members.

    + + + + + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(SharedStorage &storage)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    initialize()cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    pointercutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html new file mode 100644 index 0000000000..3da80d41b2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.html @@ -0,0 +1,350 @@ + + + + + + + +Cutlass: cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params Struct Reference
    +
    +
    + +

    Parameters. +

    + +

    #include <tile_iterator.h>

    +
    +Inheritance diagram for cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params:
    +
    +
    + + +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params + +
    + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (SharedStorage &storage)
     Initialize params to access storage object. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)
     Initializes params to access a raw pointer. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize ()
     Initializes params to default values. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Scalarpointer
     Pointer to memory. More...
     
    - Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize() [1/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (SharedStoragestorage)
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [2/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalarptr,
    Index stride_d,
    Index stride_h,
    Index stride_w 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [3/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize (Scalarptr,
    Index _stride_d,
    Index _stride_h,
    Index _stride_w,
    Index _inc_d,
    Index _inc_h,
    Index _inc_w,
    Index _inc_advance 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize() [4/4]

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ pointer

    + +
    +
    +
    +template<typename Traits_ , typename Scalar_ , IteratorAdvance::Kind Advance_ = IteratorAdvance::kH, MemorySpace::Kind MemorySpace = MemorySpace::kGeneric, typename Index_ = int, typename FragmentElement_ = Scalar_, IteratorFragment::Kind IteratorFragment_ = IteratorFragment::kScalar, typename Skew_ = Shape<0, 0, 0, 0>>
    + + + + +
    Scalar* cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params::pointer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.png b/docs/generated-html/structcutlass_1_1TileStoreIterator_1_1Params.png new file mode 100644 index 0000000000000000000000000000000000000000..aabb9a31b62d0c368676729782cd1d8ca3f55c4f GIT binary patch literal 2388 zcmb7G2{hDeAO71J8kIGSNp}bpvP>FD87_ajC~gRem}G0l5{5x064M}CWg8}#nQ`x;rlse8`1ob%mtzVAKfec#{nyuWjv-+Rvcd(InUVPWpQbk8NUZ4Rq|@oPF9NTyATRcknH9iF z`Z0Q2tW{amB&c`MI*@Jt>8#&KB{WLs)$c2N2_e^Kb;CI|x{J<8!UN*swWU7k0ta?n z9r}B1`*9k5IEOH#t?WAz0!_xNn(%K2ip&=?18Pyp+3*_YAngBvKKw zb4Y1XX7=pHw1t2D{M?AwGu0Dh>uh9jZrfDy8D?q0sL9rDvrc{_LRc zb@5%3X;6e!-kWeUPf>3+bZeR zH)QfT#myc%LY9v})vFm0N0;Zhp+Xa|yz4`F5%`b?1)W!>U0CP$%&hlP$?M2t2SFpv zt+G=5)?-Q(>HXaCC!f(E7;k2d(XNV?H|qI=DBO^00a`YkGd%O)enc;=m9}7HwDNqy zoFkDERbJ!lo`Q7`RYBT8WH!&Ayn&*4VR(u=Tjni#vO5Nwddk%jm$Pq{j~gpR@j2XH zWeR|Vyx=P1DI^4yRMHUwhlxm2L(+;0?y!r_2yKNbn%*2wjp9euC~YRv5e1CeaH0~- zt2rh;qu7(0wI2a}^WsijFZTWs?H#yesaIV6Cbcj5Y?q8(y3nz6mGeCZA5jpeN05Fw zeyt{NJr~Q2qF%~po4y-Sv=VHrj$J1S%cjE}y9{hSiRZTz`M6*UB0C;VWOb);xJwb4 zjDSS>i|4j6Yp7^(kKKsr3Uu-~Ccv%TtwO^WB-Y|_zOkTPTnX9d3%B&^NCfUaR=&?R z(06(BmY~>%8=trkRUd5nED)11lAQF}vOberP60Hwzgje$K#}*=ilY_w`q{T)HB(Wv zQUVbMa}!$DfN_6X_b#}$6ai%bHE3dKSF@anj1gufr%4U9R#15EM&eYyXg)*8H=V&B<%iyO6dPHIU#_8G0o-ar5onLb=GB79Z zJbqG|o9}X&s;Au_TK3eGrN8h<}}g z;0p3QreKfJQQMvQCRTI#5~)Nru(cz_KpC_Ou|s*f+=esBSRQW$5VBGcQ{-aOzI_cUfbKYA=hJq8h~vDZQW+h+djKy$j3Hj62m zt(KUrrs+rk-1M*qjsSN!+funR3J?dHN*up(u+ZdR$Qzj|$jUJ%G65$*93BOz00QBe z2YT4(fQktZz(H4m0RjL6`-A^K$#6L;OUNk|%2NL+7X4G>eZ>vst@D-9!~NfyB^OPj(X&+lBeE9= zAD00u;qUB~|Fp3;`k^OO!lPRE*yWeo*6pzJ!4s<`g`C=XjFTq-k$52d=$V#~Ye9+7 zjG=Z)za_hHpdnAV0q^DSHcViN_f@%MC?9D_emwh##0Ea|c5ax?@QJ50ut?*g(O=tX zFA_3)zHT=9K-32%i&Z$fvT^H&7Qbhu?FJ52ID}ZK<966{^iWSQ2GQwLqkgyQTCh_e zSNGC(d+I5F68dBcf>qAuw9i<6MOVewj&hMlPR-a6*g)MZg+t27rnJJc36jWARq!BY+Z2z_-+O<29%g2iPP=#N>LNrH!fzOXY%~Js*6UT zr=Gb;DPJHe8Q{*(!qmr}K_e@vD_&1KNT)j%z9`=o#y-D>q#41%njsDU%1>xSyq znddRrBolou5}8a^`Pc!mrLKI&yr!k#c)5Jyo(9a{YRbQmNrLqiytPS~*R)@3k(wrX zck&&Q9{J|61VO~eym};*Z0FU04o+c)s-a#gq^+v0TND-A9y)5Iu2Y+pP&xn&UAuf3 zf~rxR2_~=!XxdQ{L=4ID2Zx;FRP$EJQ0J_5{+waxdabyelX=hfaCV8K!;|aXqLYOP z`zk{7JkTZ6oo(wf@4DO>EUmQAepp2=?jFL$FZxf@1$J;jbiJ4!jyUC8P+S2&YBU1P zoyEN33=WNm z#+Thx&cSBBPyr@-Usd%5m4UMyhX@#ww+q#E$RDL)$0R$qn?;9y89fs;VqUe#_?=JE zJ^034kyXj&rw#J}h&o@}s5jiLMai9jc({5b@#Ir}HN8Vg<&9!7P%t+3eZTMVUk8K4 zgVnE4^3ix>vjNLarJO6|1Rt}UgSx4WGsZwXF@8T0Y}hglt`{}OU>Wr62g+n}%HMCQc4 zlNJ!dfeqTT^4O1+W83uGNhYNIZR`7X{d;$-@9~JYWSy1P7WHC%fdEk7Opm7P6!8xq CU0MPF literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1TileTraits-members.html b/docs/generated-html/structcutlass_1_1TileTraits-members.html new file mode 100644 index 0000000000..c4d8ddf526 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraits-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraits.html b/docs/generated-html/structcutlass_1_1TileTraits.html new file mode 100644 index 0000000000..b81a519418 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraits.html @@ -0,0 +1,179 @@ + + + + + + + +Cutlass: cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ > Struct Template Reference
    +
    +
    + +

    A template defining Tile Traits Concept. +

    + +

    #include <tile_iterator.h>

    + + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of the tile. More...
     
    typedef Delta_ Delta
     Number of steps between accesses along each dimension. More...
     
    typedef Iterations_ Iterations
     Number of accesses performed. More...
     
    typedef ThreadOffset_ ThreadOffset
     Functor that returns the logical coordinate of each entity's initial offset in the tile. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef Delta_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef Iterations_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef ThreadOffset_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , typename Delta_ , typename Iterations_ , typename ThreadOffset_ >
    + + + + +
    typedef Tile_ cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >::Tile
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html new file mode 100644 index 0000000000..e33d565589 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsContiguousMajor< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html new file mode 100644 index 0000000000..777f6136a6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsContiguousMajor.html @@ -0,0 +1,232 @@ + + + + + + + +Cutlass: cutlass::TileTraitsContiguousMajor< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsContiguousMajor< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    #include <tile_traits_standard.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    typedef Shape< 1, 1, kThreadsThreadShape
     Thread shape. More...
     
    typedef Shape< 1, 1, kThreadsDelta
     Delta between each thread's access. More...
     
    typedef Shape< 1, Tile::kH, Tile::kW/kThreadsIterations
     Number of iterations. More...
     
    typedef TiledThreadOffset< ThreadShapeThreadOffset
     Computes the initial offset. More...
     
    + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    +

    Detailed Description

    +

    template<typename Tile_, int Threads>
    +struct cutlass::TileTraitsContiguousMajor< Tile_, Threads >

    + +

    Tiling in which the number of threads is fewer than the tile size in the contiguous dimension.

    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, 1, kThreads> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, Tile::kH, Tile::kW / kThreads> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef TiledThreadOffset<ThreadShape> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::ThreadOffset
    +
    + +
    +
    + +

    ◆ ThreadShape

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, 1, kThreads> cutlass::TileTraitsContiguousMajor< Tile_, Threads >::ThreadShape
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsContiguousMajor< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsContiguousMajor< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html b/docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html new file mode 100644 index 0000000000..4732a54bcb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStandard-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsStandard< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStandard.html b/docs/generated-html/structcutlass_1_1TileTraitsStandard.html new file mode 100644 index 0000000000..7806ece42f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStandard.html @@ -0,0 +1,208 @@ + + + + + + + +Cutlass: cutlass::TileTraitsStandard< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsStandard< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    Chooses 'best' shape to enable warp raking along contiguous dimension if possible. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    static int const kWarpSize = 32
     Hard-coded warp size. More...
     
    static int const kWarpCount = kThreads / kWarpSize
     Number of participating warps. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsStandard< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStandard< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpCount

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStandard< Tile_, Threads >::kWarpCount = kThreads / kWarpSize
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpSize

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStandard< Tile_, Threads >::kWarpSize = 32
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html new file mode 100644 index 0000000000..2ae5190886 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsStrideMajor< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html new file mode 100644 index 0000000000..d24bc59b03 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsStrideMajor.html @@ -0,0 +1,232 @@ + + + + + + + +Cutlass: cutlass::TileTraitsStrideMajor< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsStrideMajor< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    #include <tile_traits_standard.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    typedef Shape< 1, kThreads/Tile::kW, Tile::kW, 1 > ThreadShape
     Shape of threads. More...
     
    typedef Shape< 1, ThreadShape::kH, 1, 1 > Delta
     Delta along each dimension. More...
     
    typedef Shape< 1, Tile::kH/ThreadShape::kH, 1, 1 > Iterations
     Number of iterations. More...
     
    typedef TiledThreadOffset< ThreadShapeThreadOffset
     Computes the initial offset. More...
     
    + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    +

    Detailed Description

    +

    template<typename Tile_, int Threads>
    +struct cutlass::TileTraitsStrideMajor< Tile_, Threads >

    + +

    Tiling in which the number of threads is greater than the contiguous dimension of the tile.

    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, ThreadShape::kH, 1, 1> cutlass::TileTraitsStrideMajor< Tile_, Threads >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, Tile::kH / ThreadShape::kH, 1, 1> cutlass::TileTraitsStrideMajor< Tile_, Threads >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef TiledThreadOffset<ThreadShape> cutlass::TileTraitsStrideMajor< Tile_, Threads >::ThreadOffset
    +
    + +
    +
    + +

    ◆ ThreadShape

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, kThreads / Tile::kW, Tile::kW, 1> cutlass::TileTraitsStrideMajor< Tile_, Threads >::ThreadShape
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsStrideMajor< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsStrideMajor< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html new file mode 100644 index 0000000000..e76c228b18 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake-members.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html new file mode 100644 index 0000000000..771a8e410e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake.html @@ -0,0 +1,326 @@ + + + + + + + +Cutlass: cutlass::TileTraitsWarpRake< Tile_, Threads > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads > Struct Template Reference
    +
    +
    + +

    Tiling in which warps rake across the contiguous dimension. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + +

    +Public Types

    typedef Tile_ Tile
     Shape of tile. More...
     
    typedef Shape< 1, kWarpsStrided, kWarpsContiguous *kWarpSizeThreadShape
     Arrangement of threads. More...
     
    typedef Shape< 1, kWarpsStrided, kWarpSizeDelta
     The same warp rakes along the contiguous dimension. More...
     
    typedef Shape< 1, Tile::kH/Delta::kH, Tile::kW/ThreadShape::kWIterations
     Number of iterations. More...
     
    + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kThreads = Threads
     Number of participating threads. More...
     
    static int const kWarpSize = 32
     Hard-coded warp size. More...
     
    static int const kWarpCount = kThreads / kWarpSize
     Number of participating warps. More...
     
    static int const kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH)
     Warps strip-mined across strided dimension. More...
     
    static int const kWarpsContiguous = kWarpCount / kWarpsStrided
     Warps stripmined contiguous dimension. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, kWarpsStrided, kWarpSize> cutlass::TileTraitsWarpRake< Tile_, Threads >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, Tile::kH / Delta::kH, Tile::kW / ThreadShape::kW> cutlass::TileTraitsWarpRake< Tile_, Threads >::Iterations
    +
    + +
    +
    + +

    ◆ ThreadShape

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Shape<1, kWarpsStrided, kWarpsContiguous * kWarpSize> cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadShape
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + +
    typedef Tile_ cutlass::TileTraitsWarpRake< Tile_, Threads >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kThreads = Threads
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpCount

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpCount = kThreads / kWarpSize
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpsContiguous

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpsContiguous = kWarpCount / kWarpsStrided
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpSize

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpSize = 32
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpsStrided

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + +
    int const cutlass::TileTraitsWarpRake< Tile_, Threads >::kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..e816dc744b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset, including all inherited members.

    + + +
    operator()() constcutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffsetinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html new file mode 100644 index 0000000000..cc0d9db2c1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TileTraitsWarpRake_1_1ThreadOffset.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     Basic thread offset function computed from a thread shape. More...
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Tile_ , int Threads>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html b/docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html new file mode 100644 index 0000000000..bc28ca5dd8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TiledThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TiledThreadOffset< ThreadShape > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TiledThreadOffset< ThreadShape >, including all inherited members.

    + + +
    operator()() constcutlass::TiledThreadOffset< ThreadShape >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TiledThreadOffset.html b/docs/generated-html/structcutlass_1_1TiledThreadOffset.html new file mode 100644 index 0000000000..dbbccc1f22 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TiledThreadOffset.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::TiledThreadOffset< ThreadShape > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TiledThreadOffset< ThreadShape > Struct Template Reference
    +
    +
    + +

    Basic thread offset function computed from a thread shape. +

    + +

    #include <tile_traits_standard.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     Computes the logical coordinate from thread shape. More...
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename ThreadShape >
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::TiledThreadOffset< ThreadShape >::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html new file mode 100644 index 0000000000..1bb156f6fc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::TrivialPredicateTileAdapter Member List
    +
    +
    + +

    This is the complete list of members for cutlass::TrivialPredicateTileAdapter, including all inherited members.

    + + + +
    at(int, int, int, int) constcutlass::TrivialPredicateTileAdapterinline
    TrivialPredicateTileAdapter()cutlass::TrivialPredicateTileAdapterinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html new file mode 100644 index 0000000000..f93bb89cf5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1TrivialPredicateTileAdapter.html @@ -0,0 +1,183 @@ + + + + + + + +Cutlass: cutlass::TrivialPredicateTileAdapter Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::TrivialPredicateTileAdapter Struct Reference
    +
    +
    + +

    Always returns true predicate. +

    + +

    #include <predicate_vector.h>

    + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE TrivialPredicateTileAdapter ()
     Ctor. More...
     
    CUTLASS_HOST_DEVICE bool at (int, int, int, int) const
     The value at location (d, h, w, c). More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ TrivialPredicateTileAdapter()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::TrivialPredicateTileAdapter::TrivialPredicateTileAdapter ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ at()

    + +
    +
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE bool cutlass::TrivialPredicateTileAdapter::at (int ,
    int ,
    int ,
    int  
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits-members.html b/docs/generated-html/structcutlass_1_1VectorTraits-members.html new file mode 100644 index 0000000000..011de7e914 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::VectorTraits< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::VectorTraits< T >, including all inherited members.

    + + + + + +
    IsVectorcutlass::VectorTraits< T >static
    kLanescutlass::VectorTraits< T >static
    Scalar typedefcutlass::VectorTraits< T >
    Vector typedefcutlass::VectorTraits< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits.html b/docs/generated-html/structcutlass_1_1VectorTraits.html new file mode 100644 index 0000000000..80070a5b10 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: cutlass::VectorTraits< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::VectorTraits< T > Struct Template Reference
    +
    +
    + +

    Traits describing properties of vectors and scalar-as-vectors. +

    + +

    #include <vector.h>

    + + + + + + + + +

    +Public Types

    typedef T Scalar
     Scalar type. More...
     
    typedef Vector< T, 1 > Vector
     Type that is always a vector. More...
     
    + + + + + + + +

    +Static Public Attributes

    static int const kLanes = 1
     Number of lanes of vector. More...
     
    static bool const IsVector = false
     True if the type is actually a cutlass::Vector, otherwise false. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T cutlass::VectorTraits< T >::Scalar
    +
    + +
    +
    + +

    ◆ Vector

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef Vector<T, 1> cutlass::VectorTraits< T >::Vector
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ IsVector

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + +
    bool const cutlass::VectorTraits< T >::IsVector = false
    +
    +static
    +
    + +
    +
    + +

    ◆ kLanes

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + +
    int const cutlass::VectorTraits< T >::kLanes = 1
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html new file mode 100644 index 0000000000..e5e2d78019 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::VectorTraits< Vector< T, Lanes > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html new file mode 100644 index 0000000000..39561291cf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01_4.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: cutlass::VectorTraits< Vector< T, Lanes > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::VectorTraits< Vector< T, Lanes > > Struct Template Reference
    +
    +
    + +

    Partial specialization for actual cutlass::Vector. +

    + +

    #include <vector.h>

    + + + + + + + + +

    +Public Types

    typedef T Scalar
     Scalar type. More...
     
    typedef Vector< T, Lanes > Vector
     Type that is always a Vector. More...
     
    + + + + + + + +

    +Static Public Attributes

    static int const kLanes = Lanes
     Number of lanes of vector. More...
     
    static bool const IsVector = true
     Type is actually a cutlass::Vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef T cutlass::VectorTraits< Vector< T, Lanes > >::Scalar
    +
    + +
    +
    + +

    ◆ Vector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef Vector<T, Lanes> cutlass::VectorTraits< Vector< T, Lanes > >::Vector
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ IsVector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    bool const cutlass::VectorTraits< Vector< T, Lanes > >::IsVector = true
    +
    +static
    +
    + +
    +
    + +

    ◆ kLanes

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    int const cutlass::VectorTraits< Vector< T, Lanes > >::kLanes = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html new file mode 100644 index 0000000000..a038a43122 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4-members.html @@ -0,0 +1,94 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::VectorTraits< Vector< T, Lanes > const > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html new file mode 100644 index 0000000000..7f9a574310 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1VectorTraits_3_01Vector_3_01T_00_01Lanes_01_4_01const_01_4.html @@ -0,0 +1,200 @@ + + + + + + + +Cutlass: cutlass::VectorTraits< Vector< T, Lanes > const > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::VectorTraits< Vector< T, Lanes > const > Struct Template Reference
    +
    +
    + +

    Partial specialization for actual cutlass::Vector. +

    + +

    #include <vector.h>

    + + + + + + + + +

    +Public Types

    typedef T Scalar
     Scalar type. More...
     
    typedef Vector< T, Lanes > Vector
     Type that is always a Vector. More...
     
    + + + + + + + +

    +Static Public Attributes

    static int const kLanes = Lanes
     Number of lanes of vector. More...
     
    static bool const IsVector = true
     Type is actually a cutlass::Vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef T cutlass::VectorTraits< Vector< T, Lanes > const >::Scalar
    +
    + +
    +
    + +

    ◆ Vector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + +
    typedef Vector<T, Lanes> cutlass::VectorTraits< Vector< T, Lanes > const >::Vector
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ IsVector

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    bool const cutlass::VectorTraits< Vector< T, Lanes > const >::IsVector = true
    +
    +static
    +
    + +
    +
    + +

    ◆ kLanes

    + +
    +
    +
    +template<typename T , int Lanes>
    + + + + + +
    + + + + +
    int const cutlass::VectorTraits< Vector< T, Lanes > const >::kLanes = Lanes
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize-members.html b/docs/generated-html/structcutlass_1_1Vectorize-members.html new file mode 100644 index 0000000000..2f3903bd3b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vectorize< Element_, kLanes_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Vectorize< Element_, kLanes_ >, including all inherited members.

    + + +
    Type typedefcutlass::Vectorize< Element_, kLanes_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize.html b/docs/generated-html/structcutlass_1_1Vectorize.html new file mode 100644 index 0000000000..d728c0a27a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::Vectorize< Element_, kLanes_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vectorize< Element_, kLanes_ > Struct Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + +

    +Public Types

    typedef Vector< Element_, kLanes_ > Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    +
    +template<typename Element_, int kLanes_ = 1>
    + + + + +
    typedef Vector<Element_, kLanes_> cutlass::Vectorize< Element_, kLanes_ >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html new file mode 100644 index 0000000000..8f2e996dd4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vectorize< Element_, 1 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::Vectorize< Element_, 1 >, including all inherited members.

    + + +
    Type typedefcutlass::Vectorize< Element_, 1 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html new file mode 100644 index 0000000000..ca9f3310da --- /dev/null +++ b/docs/generated-html/structcutlass_1_1Vectorize_3_01Element___00_011_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::Vectorize< Element_, 1 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vectorize< Element_, 1 > Struct Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + +

    +Public Types

    typedef Element_ Type
     
    +

    Member Typedef Documentation

    + +

    ◆ Type

    + +
    +
    +
    +template<typename Element_ >
    + + + + +
    typedef Element_ cutlass::Vectorize< Element_, 1 >::Type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1divide__assert-members.html b/docs/generated-html/structcutlass_1_1divide__assert-members.html new file mode 100644 index 0000000000..59e5af796f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1divide__assert-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::divide_assert< Dividend, Divisor > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::divide_assert< Dividend, Divisor >, including all inherited members.

    + + +
    value enum valuecutlass::divide_assert< Dividend, Divisor >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1divide__assert.html b/docs/generated-html/structcutlass_1_1divide__assert.html new file mode 100644 index 0000000000..f7dd669007 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1divide__assert.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::divide_assert< Dividend, Divisor > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::divide_assert< Dividend, Divisor > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = Dividend / Divisor + }
     
    +

    Detailed Description

    +

    template<int Dividend, int Divisor>
    +struct cutlass::divide_assert< Dividend, Divisor >

    + +

    For performing a constant-division with a compile-time assertion that the Divisor evenly-divides the Dividend.

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int Dividend, int Divisor>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html new file mode 100644 index 0000000000..c3f2e3e929 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >, including all inherited members.

    + + + +
    clear(Fragment_ &fragment)cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >inline
    ClearAccumulators(SharedStorage &shared_storage)cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html new file mode 100644 index 0000000000..e815e57d6f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ > Struct Template Reference
    +
    +
    + +

    #include <clear_accumulators.h>

    + + + + + +

    +Classes

    struct  SharedStorage
     The shared storage. More...
     
    + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ClearAccumulators (SharedStorage &shared_storage)
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void clear (Fragment_ &fragment)
     Clear the fragment. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ ClearAccumulators()

    + +
    +
    +
    +template<typename Scalar_ , int kLanes_ = 1>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::ClearAccumulators (SharedStorageshared_storage)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ clear()

    + +
    +
    +
    +template<typename Scalar_ , int kLanes_ = 1>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::clear (Fragment_ & fragment)
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html new file mode 100644 index 0000000000..b97be88f35 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ClearAccumulators_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ClearAccumulators< Scalar_, kLanes_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared storage. +

    + +

    #include <clear_accumulators.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html new file mode 100644 index 0000000000..256b383d18 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kStagescutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kThreadscutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    Warps typedefcutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html new file mode 100644 index 0000000000..7ac0411289 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference
    +
    +
    + +

    #include <dgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::DgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    typedef double ScalarA
     The scalar for A. More...
     
    typedef double ScalarB
     The scalar for B. More...
     
    typedef double ScalarC
     The scalar for C. More...
     
    typedef double ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< double, double, double, double, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, double, double, double >, kScalarsPerLdgA_, kScalarsPerLdgA_, 2, kScalarsPerLdgB_, kScalarsPerLdgB_, 2, 1, 2, 1, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmConfig.png new file mode 100644 index 0000000000000000000000000000000000000000..0769b899383659943236dd21acd6a0b3aa2a6779 GIT binary patch literal 2957 zcmdT_c|6ox8z0vVp=ePlmkJq6qjb9}%OKGpTgGneTuaEl&Ln9gW5yPfEko83k)ko9 zo29W`YD~t|f>xgtf+U{gB*}g8?7MO~J zTbo}60ibV3CNRlBeKjHa^K5jz%m^^W08NCG}K! zX@#jDI9NjbDtrWXSkkuZcGI19HsahLGO{!G5;KnPgtSt{c<{nqG{+L6RaiAdu zv1LiqGM_ha&e=ZwqVDhSv%lDXa&Baou#YfES6*D_BDW%)6p%s2iX;+Jy)~ThjNsBa zS!vH=MeaKskZ2oCA$=8vXfEl` zC&5CpbNW)+7neE@_y;bnPkSM-_c3gW2En5eubr0^jo?Bn1*&kmw?=~oa)0@t;;>4# zzwJ6-Pdr*6^*U`g>4WFSdU>(LkycBdwMZxuHius1jo{jIDz&vD8!s_5Q)sqpuZtd0 zol$Z=2rx?XqXGBM-iFaPYpt`N$5c9!6=*GX_apKR;2Ry)eOhe4CkYNAGw?ok*i@PAK&VDiX>do5K4=g*_^+!m# z<}yp(@ht0d$365G>6`H3x3J0%w>Dd;-Sa{o8&=bTIOtU*tm3g_Q1#}usz`65y zLd!Qx5Wh=>(ks4}&4Uw|QSXZKM-HBNk_iopTUej$$_yyzHpLzm#h@>};8jvy$S>y$ z=hN92FQ^FFKI*d{)YaWFV;i{R4POZq&eIxcLNO|(Nbdp>cwP{{< z<@B&|D;{Q4n!!e#FDW^uoQ6Jwvg9gO7+0ky-A&8l`&chfU#wCi7gSM8H7fdn6&Oxb zgub;*u}4y#t%}3WEj?7+K4?YB9JzjCd2Ra5?o_D?*EP)-nfp63E=DH|Duo7p)x(Oe zjs?411@m>*dB+I1Vq2#0?^7RsB8(0c6FXlf(=ROSaQ=%T=ZTmZ@z1Uiri{b+1cT?r zsZD43Gho!+n#!#{h@vuDhoO$cYb!H3SesP+eblpokh+g>$$mfYucv>ZiUl&Y^XZ92 zc5u7h3bVT)ii|IxBoT$n8ClTq-m*=oL@=S&BxK{2E9jSZS=@F&Z*nJLKR;g zJXj~Ue%-OL-IZSMiz6)_TFvHza%v?$ZporHoUCO~Wt$8sd077hni&6JvCxqf>&yRZ z1odpfy{2{_FJt9gPxoten3fHbEu8?aIHcs#3b{p7O8AZGD$18r;VWvxVl`^N9EY(d zUAW0c8F=YjVuZr%+gBeq6UZ1;f=DpztHi(1=-bPKo&Uz2(yFAt33#x_o~!tchqtNK zr-P!t&BNaa_}4Q~3Q4Wo<3C8;+Z-f*Lty>Mx&#??2qfqR5+~#BxdMRh6iCR;gSXK? zC;=$HO_w|v!hjOM-MGiygEWP1W5^)0lq|XLo2NUPf1m-1|AEe};|PAY4V>_YZQw!J zHU<(rb;cG665RgFLuu6?W_wcqHVa(k|Jf{GR=L#6+T<-h6AM#T%&;~&QJQ%o`RDI$ z6>>BA?>up8A9RDeaq>zI^oHlxY-qpHQq|4}!~H$h*V1Ko5jIL!Q~ut+;qEu~EqW&6zy# ziEIig93wt-Ejhe;vlnlig|bZcA28Y(R+d9r^<&=j+XuRjQGZD(5}6{$WL()2>Y9py z*j+p-9sngr0U5m>)J!4C&f{XUC^(DooBraNC@-WDS1f;BkiouhQGoy+iRtu4f@F7S zuPwK!a?EZ7&U>uqZW3}jT-TE4u3W&-2o{pO-6%JK1F9lkQ?NIMLxs=S%6C$r8qku> z-rl7mj$p(7V+B^cX(uUDwRe}fEwSQnu4`EzOYCcmpNUudBRFI@x?us>Y>T|XJ|Xj^ zbpkG$&~$u2T7Qo%&SeR#Nkm0qx*P`_F5J?wB~1vw(B zktJFT?YrUdT>X;;v-ON3#R62v9@+QyV%DbbTEQ-rsJkH z-^mzOTwnzrMf=aPe6#dQJ(AxW4ycUfz{@$kiZADeM}HkEQ}cG^mQ*>x z-{%Xinr^D7Ar`IquShX6s3$Q?ZbELV?Ihpj&ZCE0=}G8$nv(F46K@wpK%AWO&B=FO z1g;4>@ruq%cD^0 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html new file mode 100644 index 0000000000..2832466c10 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference
    +
    +
    + +

    #include <dgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::DgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >:
    +
    +
    + + +cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ > +cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef GemmEpilogue< GemmEpilogueTraits_ > Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators< GemmConfig_::Accumulators::Element > ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1DgemmTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..151b3c5ab08b7f2e1711f5e93244f8c69d12d45e GIT binary patch literal 8483 zcmeHNdstIfw%4(iYO7SaEn0=xIxQ^%sZyoNBW;z^s+jsfiiSjsib6|jAcVZ@SnD`Z zqN9*vgjBs6Nq{H}hLFcm5u-$ih!GHyfH4FTNFX8aURFBGg>vwlM0OAkfWvSHDL3G-?{UGd?DqKPAF z1Fq+Ti~HL51*egq6xsCJ73((OzMt*Qs|*c!jjQ_*!dDoU8i*D7|9ImL{(57oYQwWG zzkN;iv!AA_FqVRU_wZ`n=1R|-au&c#r${NAb@o#26i3b>I3V4v)g?O3UeLto0jS>Q zBUEUV;-7!ifB7xJad)P=h}iGA%M)vAT-mSIlVYHHB6+qhzoXCsLbs~a-D7F6@i(|h z$L}K+nd^^L#V8mzGGpK@mO{1N{UXlYHZ{70)v`)&to5ynq`&(;74V{LEQ zDd)hzj$K+~imF;021=_p+KY*UHQNqB>vw9ak)4Qz8q}NYuIR_1SK=-*vjqk(=#iL|m6Md5o>}zV<>cTcOum zac;OmAG#bB2@e>NmQH)K1X22zc=kgB-Ymdppo?v4k%)6E4K@~W1rVutXxryQ>Rt+d z5j;}X-dDbg)BAAcxy=2%%^c!R@W=}HNV(MMswDN7PvzQDnsxHm?PuyRNp$b{h!Bod z3J{SP6y$Iln;3f}n=EouCychi$%z+CqiLb}JWoUk`H?>CoJZAtZJTKs0#}P#m6v)Q8lQE@%?K^{0m1vK&I16v9 zqwaM}zD0ioKdDXlLCaw)qJR#U9_?!C*Y@V52}GYOdhj&OhoOo>)low(V!%VRQN^Io zgNsZOA}VOa_C0Ka;wwX5xT14!5shff$7g`6Px%~e=o25pr&oold8Fnc$)+3pS7d(N zoqSOPSY62vPDfUCni5BJ-b0CKQ5_7uYy9H1)_LaC=KQ40kB0|El7m*od~)9S5_A43 zdZ90UDcSaew>}Rw>|uZ?kTgfH?@`b13yv}}ERd8aK42T#MRrC(r|SxjOoo&3P#bF( zl;ag77Q_fah8sX%WU+Gs4nW61`$Rrm;g2q`T2|{7)#U7t(c9YF2HqT>Nt!~1z5!jm zlE3QVbG)cm!4sBs{0bpF>wW_lk*+R>mudL z5|$>-9ByvxRVSo%(!S)9%bXj}V`g`=*Z`=;*cegJO_w&|D6M*q)06L=E37g9N&?=pogoYk;N)g=hwE;66=mVU0PKk(U#H$G z?aufS_yD{FyZ*wmnoP;amk&3dbxW|OJ!oJ+b6YCSM;zV935FaS`7tf>jYG*|X5Bc> z$a#(0u4bUs-HSNgu+E;bwwi{zOP#5}_g6!QNJJ-hqbXvho>1txzsoxC2XwNyY=Eoe z+U!ZxT)lf@LF}hU@`H11+-&OzDKudA2iiRH1(;^4BgBM@prThb^}OUib3{6`Z;2%- z4m8CM4b64y^Aa#p%{S@6Kd^hS5#XWqLxIg!t${49Jy|C>6&HK5AVhp_oD-Eem8Y;; z-JdQ`1ikappNyXnu9U2rx1%j7C57nb&i1-xsvEmBFI%1_{1Ne2M zc2%>fO44$wyftwC$7gy<-toA4dt0sXS>U!;X-jFjaYC9Q;+U^3Fzv28 z36TAtk-Qr`hZrRP`);hVN3f=c{?%Qy#%A4vbN&$+*5r-=tGVp6c`&MzQ%YzXJDs;O zMY;VatRn}kKbl$-qrk4wv#QmtEssqtpX6DdZ`)L=%}^+}^&6T1sfx$2-i=ga$8qyD zm2|4x3#H!HZXQ<-GfhWiT^>S&y}q8N?@&b}>NdfU$Fx1&{9tFiXVKO?$K?(~hx=5l zAUr2M7YBLp3}&=+t)+zOlduKZ-qtNn*c+Xb_%$u+I^Q`3E}}(HJe-r#dy!P%$C(9{ zpYTVk8z?mic7p~@-u-swb7J-b`j)krC@Y<)q2cd=@pXmRolLc4sJ;BZ8wWMadCu@$ zXT=YQJ4zoqB%rNGKgbpRl!;i`04c4&D+*&xg7i*U%xxx;)VtF!P*d4#PM4Vdi_6=j^% zV_Ho&V+wgY9RsM%dl>yKgPb|-t1f*OW>Tvnu7ec%&^21%55~K_K*PQdKk38jJ0xo4GG5+5OG=!#ogQi|~(Uy$!Ee3G0ESGjQBV zp=-?rX8F4@Z;j!W0m#wq0PTzVq@!=ciw|Qg!y~1{_V!TICj0S@410*Cc(Wa0Fz;H2 z9|2-(p(AczeI8}htX+DSptSN37S=s)S1Z(4mFBP2<5Xb^Jx{qgCfy$wRl+xa9+^(n ztP>f5>?xUe`OHS6t@KtJw3uUYC??A>c!IgWlS}!6l*J-Zuz%ydf)pe*Ek0hX+emS74 zBJi3tM3?Zv+<-1;)Jc94c1osQYYizE5fsj{O3!xS9Ut60qLqBY>48C;nLZwpl(?1$ z8cwwFl%56GHfA1U5{TJE71E^)?x$zNz>g51Zn^8lI-xN;ip8g0!HJ9=rHVBA)quK) zfSpNSRif-c(I*3s0ycI$v~d@K+vjY(37d|w%cApDO0RgP{@+e7pGkdx54rv4xc5Wq z4o#T#SX>?9Tb`0W*yw~wPYE5^zdz{lxSv~>DKQ}-kXrm&wB~*j?4_r~rw^Bbmw)}` zOP4T~iO)YJ;Kbl}pOVqwqHp#*CG<;^p3?P_Z_@9!4*KS|Yzz7=%MgA4S?ce^%)ipB zqug=>v5i<^IPs4+;G`32>lZ)sqDwn6p8a&ay)(Z2nPUUf{)>kvvL0rkuLQDC%Vi$l zItx{JLBqfHhk4Ds$UVI=LX>xvrn>W@_m%$-FGQ&h=B_;BksYT;Ho z!KI8savA^@JF?DqE<_`%GQ3N4(Uu*4=UVehE1&=-4{odSgK+<0_XawvyQGhc6h88h9r zaEq#5`J6lHFfZ4_G_E-WYw~Jt3HC`(xZ-1JV>-@ssJ}VDfOKa?YPukm_(bubb*SnI zvms}(r9x|_EH#QyD1@v;5zOqA$t34)x95v?4UF}qG0`iawT9XkS+8?m13mT&SJbb| zDC9hUPEJzzgHvQxK9RnGkRv&9!pNf}3@-?wC+>%HEy3T?v11iM1h1c0#_9k)PlCh_ z-TCzr3_`e!C5OrABp+=(|8%@wbiX_@ERmJ2k!hSCkDJ2P7k$<33T9{`sZPaS3$8v4 zPXu;?JFaQwsyZ^U4nboS>;-Sv>EJcK)^Y!yQA-rsj;i5W=O@bOPZm!62>des-}mIK z5t8cMBMi$KokA-VVp-vQ)Ey7aA4na-oaYn|T`=87@%FTHjK0D9>*<0<_Z=G-%ffTm z;#nhj0|NGdXJiafb2lL4)I(kQS%H#8QI|M`?sr6AwU7H957(h8QNS?M+~)t=0~IYg z3rmn*9}a}!Gh#jp$FE;Tis{^10Z7CAqWJVN8ij7W%n~TG8qU$F2F8Sdwt~arg_=1k z=?&`G#>^7te(G2EF;Ptc)!^}XGCbx z$}n!LwHk#M=t``1tHRWY$o$qlV?2c~_?S)+Rr$JZSa!z**;_klsFw`#5yJC6-942Y zIhk06pQBrTnl#n%-a=M>fgsni-+%XH&jFxpjQ_GK**)pu=_K6k-?1c|Ekj>?^1#FB zzgW*%V{9=G=@p52w9~s!4dO~P9is0?gR)tX}*2=LSE9K_aO997g^}@E!vprPVJ|^G-}&Cpm}n*9a8_| zkW_lk>)z)jSt77}PbO_uD5%r9hGx_GO~7m8h*xEg!^}f$8|!|tzfbu5CzAF5P%Ql4 z+1x)~CVf<8bqD`UcMv1Ln|e6OMZd+908LZVZ!x8jXV9`|usHjFh}BbU_4)tM)hF@> z`n}zA<#Xk`Z~hYv;?l&Y(vV4XsZN3T4X!^@42Pad7Tz5d#>;$7Pv;w2v&$O%8AilB6%mGNG>GBEykEG1qj#6_UgZIrdU1m@?s?C3KFIbw{}Zr`@FIA?2DSg z$td^LMznouje1Dl{OA5x2sKGy0bv{LD_`w>)fr*o3rr>`{MIOj-Ih-G9^nYJb*Nv#OD7c&L8-!Wd?{)i^H?E;*N<)^ERt zMiaSNSTv3BnPBx4$tcQZ-$fyJjovQjhpxgQ-l%Dj@&r}ayyCvNIQPB(LP6tu&UkL+ z(5q~m7raAZ?gPGkkFqHaVc7MkkNJ8o2H9uTwv`#g6pHTMbThIT%~|3;!V>qj3L?6i(JjUJ-#CK4!H!>C@v5o14BXNcY`Knn0DlAvpG2JU7$Opj zX@94-QtIz}Sd%TtOpS@q4uxnvX(QX=%IC zK^1&?E%-g!g||YZrP3F{6-TkdWsIG0;p~`}4kY$Wcl_pl))iV&ii`s&tF@b_Jj$e9 zhp|CRA1$Av(arGM15`~vw%@{uQT7=&O((Q$zs@HGny*5HCyNai0lH*RAsr3HrmMlS zc>#zv>`qKns--kCNi;YkMj?FOe6!7jWhORu+zS*(rA1g3<^Yjs(BU!(4{$SrdmGJR zF^Z14h-3-+c<^HIi}zq%lj)~!#5ry*V2{^_6%w+0m48d$tE;8V8_p&?AiEg`9HHtl zNGHnGb)kffzpkQkl)l?g$bTi;~%`*jMMJ& z?wF4GPI_>6h;3IFvZAn^snN3c@muX*?5Zfd&prgqY7P4FXdOz-CnO7mF2y)88e6ky zq&{;m-#T_4ra6K&z)>Ra{6`*^m>LM}o9 zQZOwjz(l9I^`?Aoj1i-CzcC?35uk#H#&8AjJQ_`OA{2EDhS0Lz@e?iT(Jydt`F;bg z{*MBINBX+@Tj{J@c5ladjOJ@8cN7&<`k;}(+2zYy1?zt3YcF}AI=N}PsBUAxRrLef z<3r5v{G-JbIDHHi^Vivr+?%9-a;>uP3m8f?6PpesQlYi(3#Cov*16WowjLFrwS9iM zpGWcQQ*QC!sxWDLiTDL5Z^7aCh2}%%F9vGNlH;kU67G>OR4F?TGIqy^G;=iKIHkUs9c`s*r59s0ZR|ic zcx^QRb9BU8{XS}*56yg36X;uB+_U#FHj)P4mzCFI(13&M;gC>aj8XSgrW5qUuDpJ1IeNwmo89rJ3kC@@X)pp4-43>exI zgQ4W?@XbXMccZ!EYNk@ljOXMaFuo*9+$7*Ncj_coqAWve=oU>Q_Z$gj6!q6uD)x!$ z{)o&c;T8Hdl)5p)E7I9l=ZO25@3~tlM4PG&X$=|G{4fVvgTjM2C_y+jbe^L*!nuxl zd^m~ee~%}=4><`(p}`s8_w&QT9BR%%VF^a;7Ow7vl~Y|A^oA_w70AH$p%_3N zJ@i5n%)vB&@_4GhWpUs835CA>ltds>uwr!*FL*%3E#9hUf*7-GiP2PHiY9(Q+BjGutp@UCyQ zGrff8+-GJHuwY$C5Of~8^1zR*?i3X6h@bfrxIpJzCldQ}G4FBD#h;4-w?19u!4fF$ z{ZG~2_5kWJJ?uC5P@$Yfe>~;St*a-jSoeOQD3J9}G~h%i==S+NQ0O^FLf}6dCVZN_ LW<||MTmJMvXTd1~ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html new file mode 100644 index 0000000000..f03e26ac28 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::FragmentMultiplyAdd< Scalar_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html new file mode 100644 index 0000000000..bde87a6e3c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd.html @@ -0,0 +1,318 @@ + + + + + + + +Cutlass: cutlass::gemm::FragmentMultiplyAdd< Scalar_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::FragmentMultiplyAdd< Scalar_ > Struct Template Reference
    +
    +
    + +

    #include <fragment_multiply_add.h>

    + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef Scalar_ ScalarA
     The type for A. More...
     
    typedef Scalar_ ScalarB
     The type for B. More...
     
    typedef Scalar_ ScalarC
     The type for C and D. More...
     
    + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE FragmentMultiplyAdd ()
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply (Scalar_ a, Fragment_ const &b, Fragment_ &d)
     Multiply : d = a*b. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply_add (Scalar_ a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Shape<1, 1, 1, 1> cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Scalar_ cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Scalar_ cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + +
    typedef Scalar_ cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::ScalarC
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentMultiplyAdd()

    + +
    +
    +
    +template<typename Scalar_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::FragmentMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply()

    + +
    +
    +
    +template<typename Scalar_ >
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::multiply (Scalar_ a,
    Fragment_ const & b,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename Scalar_ >
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< Scalar_ >::multiply_add (Scalar_ a,
    Fragment_ const & b,
    Fragment_ const & c,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html new file mode 100644 index 0000000000..32953dab92 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::FragmentMultiplyAdd< half > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html new file mode 100644 index 0000000000..141354c25f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1FragmentMultiplyAdd_3_01half_01_4.html @@ -0,0 +1,304 @@ + + + + + + + +Cutlass: cutlass::gemm::FragmentMultiplyAdd< half > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::FragmentMultiplyAdd< half > Struct Template Reference
    +
    +
    + +

    #include <fragment_multiply_add.h>

    + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef half ScalarA
     The type for A. More...
     
    typedef half ScalarB
     The type for B. More...
     
    typedef half ScalarC
     The type for C and D. More...
     
    + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE FragmentMultiplyAdd ()
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply (half a, Fragment_ const &b, Fragment_ &d)
     Multiply : d = a*b. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void multiply_add (half a, Fragment_ const &b, Fragment_ const &c, Fragment_ &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InstructionShape

    + +
    +
    + + + + +
    typedef Shape<1, 1, 1, 1> cutlass::gemm::FragmentMultiplyAdd< half >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    + + + + +
    typedef half cutlass::gemm::FragmentMultiplyAdd< half >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    + + + + +
    typedef half cutlass::gemm::FragmentMultiplyAdd< half >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    + + + + +
    typedef half cutlass::gemm::FragmentMultiplyAdd< half >::ScalarC
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ FragmentMultiplyAdd()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::FragmentMultiplyAdd< half >::FragmentMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply()

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< half >::multiply (half a,
    Fragment_ const & b,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::FragmentMultiplyAdd< half >::multiply_add (half a,
    Fragment_ const & b,
    Fragment_ const & c,
    Fragment_ & d 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html new file mode 100644 index 0000000000..f0424e29ac --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::Gemm< GemmTraits_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html new file mode 100644 index 0000000000..c2f993efa9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm.html @@ -0,0 +1,522 @@ + + + + + + + +Cutlass: cutlass::gemm::Gemm< GemmTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::Gemm< GemmTraits_ > Struct Template Reference
    +
    +
    + +

    #include <gemm.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Gemm< GemmTraits_ > This_
     This class. More...
     
    typedef GemmTraits_ Traits
     The traits. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::ScalarA ScalarA
     The scalar for A. More...
     
    typedef Traits::ScalarB ScalarB
     The scalar for B. More...
     
    typedef Traits::Epilogue::Scalar ScalarEpilogue
     The scalar in the epilogue. More...
     
    typedef Traits::Epilogue::ScalarC ScalarC
     The scalar for C. More...
     
    typedef Traits::Epilogue::ScalarD ScalarD
     The scalar for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Gemm (Params const &params_, SharedStorage &shared_storage_)
     Ctor. More...
     
    CUTLASS_DEVICE void multiply_add ()
     Do the GEMM. More...
     
    + + + + + + + +

    +Static Public Member Functions

    static __host__ cudaError_t launch (Params const &params, cudaStream_t stream=cudaStreamDefault)
     Launch the kernel. More...
     
    static __host__ cudaError_t launch (CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)
     Launch the kernel. More...
     
    + + + + + + + +

    +Public Attributes

    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    + + + + +

    +Static Public Attributes

    static int const kThreads = Traits::GemmConfig::kThreads
     The number of threads. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Index

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Index cutlass::gemm::Gemm< GemmTraits_ >::Index
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::ScalarA cutlass::gemm::Gemm< GemmTraits_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::ScalarB cutlass::gemm::Gemm< GemmTraits_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Epilogue::ScalarC cutlass::gemm::Gemm< GemmTraits_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Epilogue::ScalarD cutlass::gemm::Gemm< GemmTraits_ >::ScalarD
    +
    + +
    +
    + +

    ◆ ScalarEpilogue

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::Epilogue::Scalar cutlass::gemm::Gemm< GemmTraits_ >::ScalarEpilogue
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Traits::SharedStorage cutlass::gemm::Gemm< GemmTraits_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef Gemm<GemmTraits_> cutlass::gemm::Gemm< GemmTraits_ >::This_
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    typedef GemmTraits_ cutlass::gemm::Gemm< GemmTraits_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ Gemm()

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::Gemm< GemmTraits_ >::Gemm (Params const & params_,
    SharedStorageshared_storage_ 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ launch() [1/2]

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static __host__ cudaError_t cutlass::gemm::Gemm< GemmTraits_ >::launch (Params const & params,
    cudaStream_t stream = cudaStreamDefault 
    )
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ launch() [2/2]

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    static __host__ cudaError_t cutlass::gemm::Gemm< GemmTraits_ >::launch (CUfunction kernel,
    Params const & params,
    CUstream stream = CU_STREAM_LEGACY 
    )
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::Gemm< GemmTraits_ >::multiply_add ()
    +
    +inline
    +
    +

    Define the mainloop iteration size

    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::Gemm< GemmTraits_ >::kThreads = Traits::GemmConfig::kThreads
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    Params const& cutlass::gemm::Gemm< GemmTraits_ >::params
    +
    + +
    +
    + +

    ◆ shared_storage

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + +
    SharedStorage& cutlass::gemm::Gemm< GemmTraits_ >::shared_storage
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html new file mode 100644 index 0000000000..18c258d733 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    InstructionShape typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kStagescutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kThreadscutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    kWarpSizecutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    OutputTile typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarA typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarB typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarC typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    ScalarD typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    Warps typedefcutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html new file mode 100644 index 0000000000..3bc9b65f34 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmConfig.html @@ -0,0 +1,693 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef ScalarA_ ScalarA
     The scalar for A. More...
     
    typedef ScalarB_ ScalarB
     The scalar for B. More...
     
    typedef ScalarC_ ScalarC
     The scalar for C. More...
     
    typedef ScalarD_ ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef MultiplyAdd_ MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kWarpSize = cutlass::kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA = kScalarsPerLdgA_
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA = kScalarsPerStsA_
     
    static int const kScalarsPerLdsA = kScalarsPerLdsA_
     
    static int const kScalarsPerLdgB = kScalarsPerLdgB_
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB = kScalarsPerStsB_
     
    static int const kScalarsPerLdsB = kScalarsPerLdsB_
     
    static int const kScalarsPerLdgC = kScalarsPerLdgCAndStgD_
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD = kScalarsPerLdgCAndStgD_
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD = kScalarsPerStsD_
     
    static int const kScalarsPerLdsD = kScalarsPerLdsD_
     
    static int const kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD
     
    static int const kStages = kStages_
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd::Accumulators cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd::AccumulatorsPerWarp cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd::InstructionShape cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::InstructionShape
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef MultiplyAdd_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::OutputTile
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarA_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarB_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarC_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ScalarD_ cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::ScalarD
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + +
    typedef ShapeDiv<OutputTile, AccumulatorsPerWarp>::Shape cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccumulatorsPerLdsA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kAccumulatorsPerLdsA = kScalarsPerLdsA / InstructionShape::kD
    +
    +static
    +
    + +
    +
    + +

    ◆ kAccumulatorsPerLdsB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kAccumulatorsPerLdsB = kScalarsPerLdsB / InstructionShape::kD
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdgA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdgA = kScalarsPerLdgA_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdgB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdgB = kScalarsPerLdgB_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdgC

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdgC = kScalarsPerLdgCAndStgD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdsA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdsA = kScalarsPerLdsA_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdsB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdsB = kScalarsPerLdsB_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLdsD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerLdsD = kScalarsPerLdsD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStgD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStgD = kScalarsPerLdgCAndStgD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStsA

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStsA = kScalarsPerStsA_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStsB

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStsB = kScalarsPerStsB_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerStsD

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kScalarsPerStsD = kScalarsPerStsD_
    +
    +static
    +
    + +
    +
    + +

    ◆ kStages

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kStages = kStages_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarpSize

    + +
    +
    +
    +template<typename ScalarA_, typename ScalarB_, typename ScalarC_, typename ScalarD_, typename OutputTile_, typename MultiplyAdd_, int kScalarsPerLdgA_, int kScalarsPerStsA_, int kScalarsPerLdsA_, int kScalarsPerLdgB_, int kScalarsPerStsB_, int kScalarsPerLdsB_, int kScalarsPerLdgCAndStgD_, int kScalarsPerStsD_, int kScalarsPerLdsD_, int kStages_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmConfig< ScalarA_, ScalarB_, ScalarC_, ScalarD_, OutputTile_, MultiplyAdd_, kScalarsPerLdgA_, kScalarsPerStsA_, kScalarsPerLdsA_, kScalarsPerLdgB_, kScalarsPerStsB_, kScalarsPerLdsB_, kScalarsPerLdgCAndStgD_, kScalarsPerStsD_, kScalarsPerLdsD_, kStages_ >::kWarpSize = cutlass::kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html new file mode 100644 index 0000000000..5c3b045aa6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc-members.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmDesc< Scalar_, Index_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html new file mode 100644 index 0000000000..9f4c8fd0b2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmDesc.html @@ -0,0 +1,344 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmDesc< Scalar_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmDesc< Scalar_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Index_ m
     The dimensions of the GEMM. More...
     
    Index_ n
     
    Index_ k
     
    Scalar_ alpha
     The alpha/beta scaling values. More...
     
    Scalar_ beta
     
    void const * d_a
     The source matrix A. More...
     
    Index_ lda
     The stride for A. More...
     
    void const * d_b
     The source matrix B. More...
     
    Index_ ldb
     The stride for B. More...
     
    void const * d_c
     The source matrix C. More...
     
    Index_ ldc
     The stride for C. More...
     
    void * d_d
     The destination matrix D. More...
     
    Index_ ldd
     The stride for D. More...
     
    +

    Member Data Documentation

    + +

    ◆ alpha

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Scalar_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::alpha
    +
    + +
    +
    + +

    ◆ beta

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Scalar_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::beta
    +
    + +
    +
    + +

    ◆ d_a

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void const* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_a
    +
    + +
    +
    + +

    ◆ d_b

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void const* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_b
    +
    + +
    +
    + +

    ◆ d_c

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void const* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_c
    +
    + +
    +
    + +

    ◆ d_d

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    void* cutlass::gemm::GemmDesc< Scalar_, Index_ >::d_d
    +
    + +
    +
    + +

    ◆ k

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::k
    +
    + +
    +
    + +

    ◆ lda

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::lda
    +
    + +
    +
    + +

    ◆ ldb

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::ldb
    +
    + +
    +
    + +

    ◆ ldc

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::ldc
    +
    + +
    +
    + +

    ◆ ldd

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::ldd
    +
    + +
    +
    + +

    ◆ m

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::m
    +
    + +
    +
    + +

    ◆ n

    + +
    +
    +
    +template<typename Scalar_, typename Index_ = int>
    + + + + +
    Index_ cutlass::gemm::GemmDesc< Scalar_, Index_ >::n
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html new file mode 100644 index 0000000000..6024711bae --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue-members.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    epilogue(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    Functor typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Index typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Iterations typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    mcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ncutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    OutputTile typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    paramscutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Scalar typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_load_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    shared_storagecutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_store_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedLoadTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStorage typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Traits typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html new file mode 100644 index 0000000000..3f08c9cf73 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.html @@ -0,0 +1,755 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >:
    +
    +
    + + +cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > +cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogueTraits_ Traits
     The traits class. More...
     
    typedef Traits::Params Params
     The params. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::OutputTile OutputTile
     The output tile. More...
     
    typedef Traits::Iterations Iterations
     The number of iterations. More...
     
    typedef Traits::Accumulators Accumulators
     The accumulators. More...
     
    typedef Traits::Scalar Scalar
     The scalar. More...
     
    typedef Traits::Functor Functor
     The functor in charge of the math. More...
     
    typedef Traits::GlobalLoadIteratorC GlobalLoadIteratorC
     We do not support 3D or 4D shapes. More...
     
    typedef Traits::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Traits::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Traits::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Traits::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Traits::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Traits::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to load D in shared memory. More...
     
    typedef Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
     The shared load transformer for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GemmEpilogue (Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
     Ctor. More...
     
    CUTLASS_DEVICE void epilogue (Coord< 3 > const &block, Accumulators &accumulators)
     Execute the epilogue. More...
     
    template<bool kBetaIsZero_>
    CUTLASS_DEVICE void epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators)
     
    CUTLASS_DEVICE void shared_load_fence ()
     The memory fence for shared loads. More...
     
    CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + +

    +Public Attributes

    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Accumulators cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Accumulators
    +
    + +
    +
    + +

    ◆ Functor

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Functor cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Functor
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalLoadIteratorC cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalLoadIteratorC
    +
    +

    The iterator for C in global memory.

    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalStoreIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalTransformerC cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::GlobalTransformerD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Iterations cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::OutputTile cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Params

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Params cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Params
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Scalar
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GlobalLoadIteratorC::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GlobalStoreIteratorD::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::ScalarD
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedLoadIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedLoadTransformerD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Copy<typename SharedLoadIteratorD::Fragment> cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedLoadTransformerD
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedStorage cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedStoreIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef Traits::SharedStoreTransformerD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStoreTransformerD
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GemmEpilogueTraits_ cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmEpilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GemmEpilogue (Params const & params_,
    SharedStorageshared_storage_,
    Index m_,
    Index n_ 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ epilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::epilogue (Coord< 3 > const & block,
    Accumulatorsaccumulators 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ epilogue_with_or_without_beta()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    +
    +template<bool kBetaIsZero_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::epilogue_with_or_without_beta (Coord< 3 > const & block,
    Accumulatorsaccumulators 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_load_fence()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_load_fence ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_store_fence ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ m

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::m
    +
    + +
    +
    + +

    ◆ n

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::n
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    Params const& cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::params
    +
    + +
    +
    + +

    ◆ shared_storage

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    SharedStorage& cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_storage
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogue.png new file mode 100644 index 0000000000000000000000000000000000000000..25ed6dabbef7517cb614ec309ebce0e0f5d9e1a1 GIT binary patch literal 1600 zcmchX`%}_c7{@Ry}&ig#?oOhl%bLKPW#{SSi zsG*f1002<*K2$gWtdoF#zrHqDjYBp#fMT&fI3hr!(SUYdG0A$Cb{lkGwm={_RGkqC zw)&~zp;!PMT3fwc5pDng=|Q9RL}X~qGN%8maWX-fhZN3fI!|_BbRgve&z2we!tX?r zHdmcGsb2Rvz)%0_RTDpdz{uCed8cno1;RJ*Yuas~Hcjj3zFKDEn=4&~jmRs~*<~Y( zchAtOI9f=NSVG7y_g0;cI+2v${4E}<{aF|RDGQQ3pQiAOyCNyL%mUT>G;%fLfmt1j ze*))oFRc$ojH8t>?xt}wtPAHzS<0%43LQg;ZmTS8r zTe*=*w=Xiv=NHaFfvp6rcIs%Sq^M5nBeEhdL1=v3$BttUrMwmRY#4_g%^c>Re)-CV zn6x%Wi9F*iTxNpsPcR^fu5%!8ZSMaAwtbYGNvEZg0F8jV^bBAU;sqEHi&m$lF@8>8N74;3X8r1O@1%;zntrrQm zckc=vFqg(59)v*u&L7liNtwudttW6jRcifRD@%8>(G zjJH?nUraos)qNnCJiIwqtR3Gk0k%}jfTZg7ud9Krj;WtMRBJn+i|+yOfRVrye0d`Q z)qS8DOs!6Y{CY60bQ9T7_BSY(=P%m;*0-}72mVn=!#|5(Mj#gS_#(=>P}{j z=_cgN*Ds+;pe51NXaW`! z?nz8UC+3E@pogkB4jhjiwXn4EWAg3B=oN^g+trUs#iM~%>Z5mwEAZX7c2VwUl+RBD zVq@m!6nB`Vnn4MbMmqi)@z3P1t|Iz6>-a=;*OH2Y3GYsnPu(oah`kjhj-px9Wuxj0 zMr;LU$)sMVg@sd+89NLH&2PlBR3g_7Dc50o8s~f<<*f$+Ii&}N3n^iqELoRT3YL9y zUsk4*eS=T4Kbd5EcEBCNEuWup5;(;+mU!Nao7l>4ViGIPd9rii>sgQA4o^0%ci}Kf zkYdWJ{VLNqJ-+6yh^zcTyQcI9o zL@}9e7xQ@`V7RE+F1f*;RXEmZyb+9jNMRpD#uAPU46`owE-a6K>U?8N*I6Vb84EAK zB^@1tbVp&0BjyEmUr;;0*-JbzJFggZmNkFVvRG+}Eh>n6D(O>gYBJ~9R3Lt=g~k4H zHu);fUibeXW7}J*~?rG z7n~Ru2E}qzR9cE#l|?FL0~*OBQP9XyY2j-MMs&E@y%uJPMz--ldiGMg@gPm7L7L=Q zV9%`vY1#zRRND?B_wyQZRv>bX9|>{`f;*zUKkvB&(VlQDr+SM<%jJCGp|NC_aqznb M&;g;SN + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Delta typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Index typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    ScalarC typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    ScalarD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html new file mode 100644 index 0000000000..8d99223d79 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits.html @@ -0,0 +1,418 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    struct  SharedStorage
     The shared memory to swizzle the data in the epilogue. More...
     
    union  StreamSharedStorage
     The shared memory storage to exchange data. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef OutputTile_ OutputTile
     The output tile. More...
     
    typedef Accumulators_ Accumulators
     
    typedef GlobalLoadIteratorC_ GlobalLoadIteratorC
     The iterator for C in global memory. More...
     
    typedef GlobalTransformerC_ GlobalTransformerC
     The transformer for C. More...
     
    typedef GlobalTransformerD_ GlobalTransformerD
     The transformer for D. More...
     
    typedef GlobalStoreIteratorD_ GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef SharedStoreIteratorD_ SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef SharedStoreTransformerD_ SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef SharedLoadIteratorD_ SharedLoadIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Iterations_ Iterations
     typedef typename GemmConfig::EpilogueIterations Iterations; More...
     
    typedef Delta_ Delta
     The iterations strides. More...
     
    typedef Functor_ Functor
     The functor in charge of the math. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Functor::Scalar Scalar
     We do not support 3D or 4D shapes. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Accumulators_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Accumulators
    +
    +

    The number of iterations. The accumulators.

    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Delta_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Delta
    +
    + +
    +
    + +

    ◆ Functor

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Functor_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Functor
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalLoadIteratorC_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalLoadIteratorC
    +
    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalStoreIteratorD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalTransformerC_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalTransformerD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Iterations_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef Functor::Scalar cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Scalar
    +
    +

    The scalar.

    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalLoadIteratorC::Scalar cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef GlobalStoreIteratorD::Scalar cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::ScalarD
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef SharedLoadIteratorD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef SharedStoreIteratorD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    typedef SharedStoreTransformerD_ cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStoreTransformerD
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html new file mode 100644 index 0000000000..4f04d91f30 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTileTraits typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html new file mode 100644 index 0000000000..55d6652f41 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraitsHelper.html @@ -0,0 +1,403 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef EpilogueFunctor_::Scalar Scalar
     The scalar. More...
     
    typedef GemmConfig_::OutputTile OutputTile
     The output tile. More...
     
    typedef Shape< 1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH/GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB > Iterations
     The number of iterations in the epilogue. More...
     
    typedef Shape< 0, GemmConfig_::kAccumulatorsPerLdsB *(GemmConfig_::Warps::kH *GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0 > Delta
     
    typedef EpilogueFunctor_ Functor
     The functor to do the math in the epilogue. More...
     
    typedef GemmSharedStoreTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128/sizeof(typename GemmConfig_::ScalarD)/GemmConfig_::kScalarsPerStsD/2 *GemmConfig_::kScalarsPerStsD > SharedStoreTileTraits
     The traits class to build the iterator to store to shared memory for D. More...
     
    typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorD
     The iterator to store D to shared memory. More...
     
    typedef Copy< typename SharedStoreIteratorD::FragmentSharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef GemmSharedLoadTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkewSharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for D. More...
     
    typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorD
     The iterator to load D from shared memory. More...
     
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerLdgC > GlobalLoadTileTraits
     The traits class to build the iterator to load data from global memory for C^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > GlobalLoadIteratorC
     The iterator to load C. More...
     
    typedef Copy< typename GlobalLoadIteratorC::FragmentGlobalTransformerC
     The transformer for C. More...
     
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerStgD > GlobalStoreTileTraits
     The traits class to build the iterator to store data to global memory for D^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > GlobalStoreIteratorD
     The iterator to store D. More...
     
    typedef Copy< typename GlobalStoreIteratorD::FragmentGlobalTransformerD
     The transformer for D. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Shape<0, GemmConfig_::kAccumulatorsPerLdsB*( GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Delta
    +
    + +
    +
    + +

    ◆ Functor

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef EpilogueFunctor_ cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Functor
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalLoadTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadIteratorC
    +
    + +
    +
    + +

    ◆ GlobalLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerLdgC> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadTileTraits
    +
    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalStoreTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerStgD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreTileTraits
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Copy<typename GlobalLoadIteratorC::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Copy<typename GlobalStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Shape<1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH / GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmConfig_::OutputTile cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef EpilogueFunctor_::Scalar cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmSharedLoadTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef GemmSharedStoreTileDTraits< typename Functor::Scalar, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 * GemmConfig_::kScalarsPerStsD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    + + + + +
    typedef Copy<typename SharedStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTransformerD
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html new file mode 100644 index 0000000000..b6a1ec7804 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params, including all inherited members.

    + + + + + + + + + +
    functorcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    initialize(GemmDesc_ const &desc)cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Paramsinline
    iterator_ccutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    iterator_dcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    shared_load_iterator_dcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    shared_store_iterator_dcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    stride_hcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    stride_wcutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html new file mode 100644 index 0000000000..c94e55e07d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1Params.html @@ -0,0 +1,274 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
     Setup the params. More...
     
    + + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Index stride_h
     The strides for H and W in the different iterations of the epilogue. More...
     
    Index stride_w
     
    GlobalLoadIteratorC::Params iterator_c
     The params for the C iterator. More...
     
    GlobalStoreIteratorD::Params iterator_d
     The params for the D global iterator. More...
     
    SharedStoreIteratorD::Params shared_store_iterator_d
     The params for the D shared store iterator. More...
     
    SharedLoadIteratorD::Params shared_load_iterator_d
     The params for the D shared load iterator. More...
     
    Functor::Params functor
     The functor params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::initialize (GemmDesc_ const & desc)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ functor

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    Functor::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::functor
    +
    + +
    +
    + +

    ◆ iterator_c

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    GlobalLoadIteratorC::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::iterator_c
    +
    + +
    +
    + +

    ◆ iterator_d

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    GlobalStoreIteratorD::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::iterator_d
    +
    + +
    +
    + +

    ◆ shared_load_iterator_d

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedLoadIteratorD::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::shared_load_iterator_d
    +
    + +
    +
    + +

    ◆ shared_store_iterator_d

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedStoreIteratorD::Params cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::shared_store_iterator_d
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::stride_h
    +
    + +
    +
    + +

    ◆ stride_w

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::Params::stride_w
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html new file mode 100644 index 0000000000..4856ef616e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html new file mode 100644 index 0000000000..4cad48c739 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmEpilogueTraits_1_1SharedStorage.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory to swizzle the data in the epilogue. +

    + +

    #include <gemm_epilogue_traits.h>

    + + + + +

    +Public Attributes

    StreamSharedStorage shared_stream
     
    +

    Member Data Documentation

    + +

    ◆ shared_stream

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    StreamSharedStorage cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::SharedStorage::shared_stream
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html new file mode 100644 index 0000000000..baa0c5cf41 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream-members.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmFragmentStream< Traits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    commit()cutlass::gemm::GemmFragmentStream< Traits_ >inline
    convertcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    Convert typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    fetchcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    Fragment typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    FragmentStream()cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >inline
    FragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >inline
    GemmFragmentStream()cutlass::gemm::GemmFragmentStream< Traits_ >inline
    GemmFragmentStream(Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::gemm::GemmFragmentStream< Traits_ >inline
    Index typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    initialize_predicates(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)cutlass::gemm::GemmFragmentStream< Traits_ >inline
    load()cutlass::gemm::GemmFragmentStream< Traits_ >inline
    load_iteratorcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    LoadIterator typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    Pointer typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    predicatescutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    residue(Coord< 3 > const &bounds, Coord< 3 > const &block_offset)cutlass::gemm::GemmFragmentStream< Traits_ >inline
    Scalar typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    shared_store_fence()cutlass::gemm::GemmFragmentStream< Traits_ >inlinestatic
    SharedStoreStorage typedefcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    Storage typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    store_iteratorcutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    StoreFragment typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    StoreIterator typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    Traits typedefcutlass::gemm::GemmFragmentStream< Traits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html new file mode 100644 index 0000000000..72f8e3149e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.html @@ -0,0 +1,652 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmFragmentStream< Traits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ > Struct Template Reference
    +
    +
    + +

    GEMM Fragment Stream. +

    + +

    #include <gemm_fragment_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmFragmentStream< Traits_ >:
    +
    +
    + + +cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > > + +
    + + + + + +

    +Classes

    struct  Params
     Parameters object. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Traits_ Traits
     Traits. More...
     
    typedef Traits::FragmentStream Base
     Base class. More...
     
    typedef Traits::Scalar Scalar
     Scalar type. More...
     
    typedef Base::LoadIterator LoadIterator
     Defines the load iterator. More...
     
    typedef Base::StoreIterator StoreIterator
     Defines the store iterator. More...
     
    typedef Base::Convert Convert
     Converts between tiles. More...
     
    typedef Base::Fragment Fragment
     Loaded fragment type. More...
     
    typedef Base::StoreFragment StoreFragment
     Stored fragment type. More...
     
    typedef Base::Storage Storage
     Destination storage. More...
     
    typedef Traits::Index Index
     Index type. More...
     
    typedef Traits::Scalar const * Pointer
     The pointer. More...
     
    - Public Types inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    typedef Traits_ Traits
     Defines traits of WMMA GEMM tile stream. More...
     
    typedef TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index > LoadIterator
     Defines the load iterator. More...
     
    typedef TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > StoreIterator
     Defines the store iterator. More...
     
    typedef FragmentCopy< typename TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > ::Fragment, typename TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index > ::FragmentConvert
     Converts between tiles. More...
     
    typedef int Index
     Index type. More...
     
    typedef LoadIterator::Fragment Fragment
     Loaded fragment type. More...
     
    typedef StoreIterator::Fragment StoreFragment
     Stored fragment type. More...
     
    typedef StoreIterator::Storage Storage
     Destination storage. More...
     
    typedef StoreIterator::Storage SharedStoreStorage
     The storage in shared memory. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GemmFragmentStream ()
     
    CUTLASS_DEVICE GemmFragmentStream (Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Constructor - bounds and block offset are aligned to GEMM coordinates (K, N, M) More...
     
    CUTLASS_DEVICE void load ()
     Loads the fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commits the fragment. More...
     
    CUTLASS_DEVICE void residue (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     TODO - Recomputes predicates and clears fetch registers. More...
     
    CUTLASS_DEVICE void initialize_predicates (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     Recomputes predicates aligned to GEMM coordinates (K, N, M) More...
     
    - Public Member Functions inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    CUTLASS_DEVICE FragmentStream ()
     
    CUTLASS_DEVICE FragmentStream (Params const &params, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Constructor. More...
     
    CUTLASS_DEVICE void load ()
     Loads the fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commits the fragment. More...
     
    CUTLASS_DEVICE void initialize_predicates (Coord< 3 > const &bounds, Coord< 3 > const &block_offset)
     Recomputes predicates. More...
     
    + + + + + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    - Static Public Member Functions inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    static CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::FragmentStream< Traits_::TileTraits, TileLoadIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Traits_::Index >, TileStoreIterator< Traits_::TileTraits, Traits_::Scalar, Traits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Traits_::Index, Traits_::Scalar, IteratorFragment::kScalar, Traits_::DestinationSkew > >
    LoadIterator load_iterator
     Loads fragment from global memory. More...
     
    LoadIterator::PredicateVector predicates
     Predicate vector. More...
     
    StoreIterator store_iterator
     Stores fragment to shared memory. More...
     
    Fragment fetch
     Fragment fetched by load iterator. More...
     
    Convert convert
     Converts between load fragments and store fragments. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::FragmentStream cutlass::gemm::GemmFragmentStream< Traits_ >::Base
    +
    + +
    +
    + +

    ◆ Convert

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::Convert cutlass::gemm::GemmFragmentStream< Traits_ >::Convert
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::Fragment cutlass::gemm::GemmFragmentStream< Traits_ >::Fragment
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::Index cutlass::gemm::GemmFragmentStream< Traits_ >::Index
    +
    + +
    +
    + +

    ◆ LoadIterator

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::LoadIterator cutlass::gemm::GemmFragmentStream< Traits_ >::LoadIterator
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::Scalar const* cutlass::gemm::GemmFragmentStream< Traits_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits::Scalar cutlass::gemm::GemmFragmentStream< Traits_ >::Scalar
    +
    + +
    +
    + +

    ◆ Storage

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::Storage cutlass::gemm::GemmFragmentStream< Traits_ >::Storage
    +
    + +
    +
    + +

    ◆ StoreFragment

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::StoreFragment cutlass::gemm::GemmFragmentStream< Traits_ >::StoreFragment
    +
    + +
    +
    + +

    ◆ StoreIterator

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Base::StoreIterator cutlass::gemm::GemmFragmentStream< Traits_ >::StoreIterator
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename Traits_ >
    + + + + +
    typedef Traits_ cutlass::gemm::GemmFragmentStream< Traits_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmFragmentStream() [1/2]

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmFragmentStream< Traits_ >::GemmFragmentStream ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ GemmFragmentStream() [2/2]

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmFragmentStream< Traits_ >::GemmFragmentStream (Params const & params,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset = make_Coord(0, 0, 0) 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::initialize_predicates (Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ load()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::load ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::residue (Coord< 3 > const & bounds,
    Coord< 3 > const & block_offset 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename Traits_ >
    + + + + + +
    + + + + + + + +
    static CUTLASS_DEVICE void cutlass::gemm::GemmFragmentStream< Traits_ >::shared_store_fence ()
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream.png new file mode 100644 index 0000000000000000000000000000000000000000..c217a06c7717c9be55acd9613bf64847d0177f36 GIT binary patch literal 3703 zcmds4c~n!!8pjP3t30iUh)ADCX+=W?Bp_R<0tQi3Ktz+EBDk;wh#?_?#Jb>7z-Tx^ zktMZ?)UrgOBqSk3s*plTqE!r-gs=z^vXT%2WZ?yRYI{z7p7+jqf4$%L-Fs*5%zQK7 za=)2-?ZEz!W!7u0Ei5dSf%oh_WMN?)ZZ02Kyuf_VZ@qvsKh_)wiwv5XnK2ht+IJ6r zM&2}+=B_7Co*a3MK5VWoK7VL`goU}&EV?=(-7GAYTY`57Mxy7*MS}DJU+SXgcOP!) zowC!hBBJsCcMGUx`C$ePzA&_2kpX53>!ku_v+4W?D= z?DV4GUb*F_c4#DPOPhV3T$9yCUr z5quM1lf+X==a0iURQq_Hmw!DAT(_eW&@=7f0BDacbuirT2gLNa82Z)-4W=ZcXkjsc zN-5TiT$vIerkY5|a@@efVt|8=t5L_nB>~=+`dxx+BT=E*2TsA`?ZhM@k1FXS_@~#LbJx6;da=FmMB81#Htjub34Ms|5;B9nc~D5qHd{V z&Drr)o;zMkzdeD07UirphfHEMpT*N~dHIc8K6A+OZ z!L;l?Lj%sqFO{;Q@m=QQ2+2Is7qM9)}eQqGDqj$ z&)mks{K)cei?vqrwQk_p&89G;kuC}0cG8oI zBou8|68;R3r>z$ftqLYFoLrC0i1PqWqV7@Y(~&?t$gw7bC5LyZJUPA$(o#kU`gCa3 zK_J%H;XMsED$0mu%iio()Kw0h?UkOxKOp6)dE72z88jf&$x=7aCx#kxP{Fm)=J?8% zr;!bpY9WE=>uhO!{|5 zsYFSl^rT$NUKq8d;8qt>W9YWz(F?SY!!C_>_rP`P&%pSjGZY0TP!X1{RS9W&1R}GJ zBrQxTM@t`O(}qH9N`0c>++QP%W!(M^U-CyXS^aFAQY$@8FfD&a<)G|`yi-l*SWfy> z+~xpgU`bldN!`oha@{GETa{8H+Z|}+CmB$-bV!hX7313Dt;|$Yv%%ZpEf2NBV|?+^{Ido5p93^_J)A7 zQ;kKW-Q75z-li05gcc?^Zv!5MG_d?yc?t>rmrW;>dAQ=F!}o-H*SjiFGv6RY-ys(& z{MJsK)IBU#_^nzUH6w$_WwHnFhFN9DG2<7`pg~Ozy#$PK8*9nkX!fLZLw`^9F`!7E zk{pd^c7vpo@_F?7jFK;^d97PKitJOc7c%N)BYKbM7pQ_|g@JW2R?rwWGhWb_43KLZ z*Q)n}KxvGE`>a`?bgC52oRZgeY*pp3Ix;|dDjU8M->r^b@5QJxMB>@LbpLQhWqQwV zWmeoueP;e|Q3EUi|NBeHTaBGK<$c|drTmKV?9xKU*if84~6_Ms{vbG=_GE(I8_ zMSuuda$oV;sos_~?dms9PwL&9y^onwRs_B(r(i4jv<_ikk-MU|{#@S8Rvwop{aSU>&~QuDkgW%- zOWA2U@5plbN@?4K04g4^jbFI(rnGtwR}i5lD@iz}WHY7qvEiugviP#re{gMqiCJ`n zy^1E%T}QboHw2T1Jg*|{qomw`Dpuy2YAc+*!UQ0E`i9a>D|w4CcF}zKyt6kz$I^-w zBudlHOSwDrlXYHTAHUdhS-*+v?j;7p${z(qRJ~;0>2$k|qmXjZFhZ*Pbn6|(jdYCx zjbsUeJ!!*~wuv-+zGyGdOWtZ+PqsQk9^Dy(8<|xS_RP;(5e0lxjhk~2Ze4GY8r0I< zbKUOQq;z9{qMib*TYR;16pn{f4=$ZQE>xckSS@m$J`(hNpt9=+cx5TT=Rq+*{I0v^)B30ILaS z%=;2`?I1qWwz3#-`vc@+mJs>@x_$P4b=vSg-T5(=SgtbTl@@v7;(O#Tn0@yKnkl$}y^e0^YGOpX}tH3|d(l*cfJv{#+?orymSwqsg z@80A1pv9k4IQ+j(0ga8zJNQw4eBv(_?LcF9{bAm~6Uclj=ufX4=KlwcEo4n$ZS%&s ze5##LW6}?pcNk_r^ieT;qs2EL``v^q5XKkSKgDd#9bkiNH_v&P#o4svYqM4@JWTijc&(Gq%qO+{0UdRG;#Plc zEU+s&S$W&%_7NRr8$zxeWW_aT_?^G-w6%+PSK#y*f_mr5*|43on($fPU?>%wv}W>ra;FJbv$aMP+e=vt>Iz!V zM90Mx30C{qrciRsMiCEeW6pZ11)dsuKM;vDJ?;j0=Ftb z)eDj1l$!+R{OIgEPj~X@+#sGkHD>9YB0pm}DTh z>Qq^pg#HR2sVMy{nptBF*+ROi&KUH_No4NT?t5Cbyif#rGnXO6qu)l|os9#rrX%ll z(h{??kX%CN&Dlxd7+wOo;|w{;AtitC_th4U6}bau!!av&JFMtAtjBY + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + +
    DestinationSkew typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    FragmentStream typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    Index typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    kAccessSizecutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    kLayoutcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    kThreadscutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    kUsagecutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    Scalar typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    ScalarTile typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    ThreadBlockTile typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    TileTraits typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    VectorizedTile typedefcutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html new file mode 100644 index 0000000000..861ce1d13e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStreamTraits.html @@ -0,0 +1,387 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ > Struct Template Reference
    +
    +
    + +

    Defines a FragmentStream by mapping GEMM dimensions onto contiguous and strided dimensions. +

    + +

    #include <gemm_fragment_stream.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Scalar_ Scalar
     Scalar data type. More...
     
    typedef ThreadBlockTile_ ThreadBlockTile
     Shape of the thread block tile (K, N, M) More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef ShapeDiv< DestinationSkew_, Shape< ScalarsPerInst, ScalarsPerInst, ScalarsPerInst, 1 > >::Shape DestinationSkew
     Skew added to shared memory tile. More...
     
    typedef GemmMultiplicandTraits< ThreadBlockTile, kUsage, kLayoutMultiplicandTraits
     Traits of multiplicand. More...
     
    typedef MultiplicandTraits::Shape ScalarTile
     Scalar tile shape. More...
     
    typedef ReshapeTile< ScalarTile, kAccessSize >::Tile VectorizedTile
     Reshape for vectorized access. More...
     
    typedef TileTraitsDefault< VectorizedTile, kThreadsTileTraits
     Define structure of stripmined tile. More...
     
    typedef FragmentStream< TileTraits, TileLoadIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index >, TileStoreIterator< TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kShared, Index, Scalar, IteratorFragment::kScalar, DestinationSkew > > FragmentStream
     Define the tile stream. More...
     
    + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kUsage = Usage
     Indicates identity of multiplicand. More...
     
    static MatrixLayout::Kind const kLayout = Layout
     Layout of the operand. More...
     
    static int const kThreads = Threads
     Number of threads. More...
     
    static int const kAccessSize = ScalarsPerInst
     Scalars per instruction. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ DestinationSkew

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ShapeDiv<DestinationSkew_, Shape<ScalarsPerInst, ScalarsPerInst, ScalarsPerInst, 1> >::Shape cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::DestinationSkew
    +
    + +
    +
    + +

    ◆ FragmentStream

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef FragmentStream< TileTraits, TileLoadIterator<TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH : IteratorAdvance::kW, MemorySpace::kGlobal, Index>, TileStoreIterator<TileTraits, Scalar, MultiplicandTraits::kKstrided ? IteratorAdvance::kH : IteratorAdvance::kW, MemorySpace::kShared, Index, Scalar, IteratorFragment::kScalar, DestinationSkew> > cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::FragmentStream
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Index_ cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::Index
    +
    + +
    +
    + +

    ◆ MultiplicandTraits

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef GemmMultiplicandTraits<ThreadBlockTile, kUsage, kLayout> cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::MultiplicandTraits
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef Scalar_ cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ScalarTile

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef MultiplicandTraits::Shape cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::ScalarTile
    +
    + +
    +
    + +

    ◆ ThreadBlockTile

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ThreadBlockTile_ cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::ThreadBlockTile
    +
    + +
    +
    + +

    ◆ TileTraits

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef TileTraitsDefault<VectorizedTile, kThreads> cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::TileTraits
    +
    + +
    +
    + +

    ◆ VectorizedTile

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + +
    typedef ReshapeTile<ScalarTile, kAccessSize>::Tile cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::VectorizedTile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kAccessSize = ScalarsPerInst
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kLayout = Layout
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kThreads = Threads
    +
    +static
    +
    + +
    +
    + +

    ◆ kUsage

    + +
    +
    +
    +template<GemmOperand::Kind Usage, typename Scalar_ , MatrixLayout::Kind Layout, typename ThreadBlockTile_ , int Threads, int ScalarsPerInst, typename Index_ = int, typename DestinationSkew_ = Shape<0, 0, 0, 0>>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmFragmentStreamTraits< Usage, Scalar_, Layout, ThreadBlockTile_, Threads, ScalarsPerInst, Index_, DestinationSkew_ >::kUsage = Usage
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html new file mode 100644 index 0000000000..2d92636729 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmFragmentStream< Traits_ >::Params, including all inherited members.

    + + +
    initialize(GemmDesc_ const &desc, typename Traits::Scalar const *pointer, Index ldm)cutlass::gemm::GemmFragmentStream< Traits_ >::Paramsinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html new file mode 100644 index 0000000000..3c3b6a4bc8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.html @@ -0,0 +1,161 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmFragmentStream< Traits_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmFragmentStream< Traits_ >::Params Struct Reference
    +
    +
    + +

    Parameters object. +

    + +

    #include <gemm_fragment_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmFragmentStream< Traits_ >::Params:
    +
    +
    + + + +
    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc, typename Traits::Scalar const *pointer, Index ldm)
     Initializes parameters. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Traits_ >
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmFragmentStream< Traits_ >::Params::initialize (GemmDesc_ const & desc,
    typename Traits::Scalar const * pointer,
    Index ldm 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmFragmentStream_1_1Params.png new file mode 100644 index 0000000000000000000000000000000000000000..ec9500caed385a26dc32c016ea021f92fc707f3a GIT binary patch literal 950 zcmeAS@N?(olHy`uVBq!ia0y~yVDtvE12~w0l{{Mg6 z{Ohu-k&*|F+BPf;%)L43*peS!E-jrMKO~$c^{CW##Ciqsm1%lf-dL*gG-3LrDTi5O z^;82V{gVnWsr)l>iT#nOv3Uck6Dp^(wL5+`D6*uD!MG-`d}|`)*Gvk=3h< zduP{uZ*|w6f|z$f{T8vmT$dEbOp?2FJjGRZ%DF<%ssqM)wQ*jTl3&TJfAuZ@zNMb$ ze`aJWn=#yQHWlndk%rfwnG+c7j_EN5{rI`jr-A2ZSZW60Sd71 zW;tyZ-z}{Wn)x&#~D*{It|nZ2HsW`|Ixhie_1}{nY*1g3|hz z=5y=Tra$fzd_6UJ-qUXjbL%UlH`#89$(uiW&B9o3)0(>}n>8Dz9e!%(`uWesefOSj z+xp4)r}}~|ul{X1zw9#GxxIzQx35fpeSFH#YrUZ_?bg)PulxJ!>7kV1+^F!6@yi|T zV|QOan7VjNjE497r*n4Zo!eG@KgGWP)bhFSw*I|ocE{MoasQ*bS(A$w{bRaX8TmZW z;ci-?<&Hc4Kkbh9=WkxkwIX`^zsrShFC>dNUj21??w91lZ&i|4N-mRZ;Wt~oNAu~r z+h;uczU!S{{=#YV-3W*4RnyZS{}P{f^yv(Pt*19nJ^y&q`8|%$3XB{K3@VR4+4E+N zbI96=$~liG3+pb(HlAK9z@IG{bI$MA%}UEcLwo)C<(8KEyS1WTeF=Q?P2)b#)zUc$ zm%MK7HC}i2qfvSB1(UC?X#^b;@f7qPyESet9$F$ubTSTCb^G!&yn5d8EqcT ze$V!U@BPj-*-{Q__iM}#oCw-;j!EQ!=e?fOJu4a0YChj(h=2T7hTWiyb3%VC)02eh o43;-k9r( + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Base typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    BaseParams typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    data() constcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    Delta typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Fragment typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    FragmentConstIterator typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    FragmentElement typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    FragmentIterator typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    FragmentShape typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    GemmGlobalIteratorAb(Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &block, ThreadOffset thread_offset_func=ThreadOffset())cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    inc_advance()cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    inc_d()cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    inc_h()cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    inc_stage()cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    inc_w()cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    Index typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    initialize_predicates(const Coord< 3 > &bounds, const Coord< 3 > &block)cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >::initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    Iterations typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    kAccessSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >static
    kAdvancecutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >static
    kFragmentSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >static
    kIteratorFragmentcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >static
    kLayoutcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >static
    kMemorySpacecutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >static
    kRequiresLoadFence enum valuecutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    load(Fragment &fragment, PredicateIterator pred_it) constcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    load(Fragment &fragment) constcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    load_post_increment(Fragment &fragment, PredicateIterator pred_it)cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    load_post_increment(Fragment &fragment)cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    paramscutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    Pointer typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    predicatescutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    PredicateVector typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    residue(Index k)cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    Scalar typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    SharedStorage typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Skew typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    stagecutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Storage typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    This_ typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    thread_offsetcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    ThreadOffset typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    Threads typedefcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >
    Tile typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    TileLoadIterator()cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >inline
    Traits typedefcutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    valid(int d, int h, int w, int c) constcutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html new file mode 100644 index 0000000000..4210572d79 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.html @@ -0,0 +1,912 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >:
    +
    +
    + + +cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ > +cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > > + +
    + + + + +

    +Classes

    struct  Params
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalIteratorAb< TileTraits_, Index_ > This_
     This class. More...
     
    typedef TileLoadIterator< TileTraits_, typename TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ > Base
     The base class. More...
     
    typedef Base::Fragment Fragment
     Fragment type loaded by the iterator. More...
     
    typedef TileTraits_::Scalar Scalar
     The scalar. More...
     
    typedef TileTraits_::Threads Threads
     The threads. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef TileTraits_::ThreadOffset ThreadOffset
     The thread offset. More...
     
    typedef cutlass::PredicateVector< ShapeCount< typename Base::Iterations >::kCount > PredicateVector
     
    typedef Base::Params BaseParams
     Iterator parameters type. More...
     
    - Public Types inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    enum  
     Do we require a fence? More...
     
    typedef TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > > Base
     Base class. More...
     
    typedef Base::Traits Traits
     concept TileTraits More...
     
    typedef Base::Scalar Scalar
     Scalar element. More...
     
    typedef Base::FragmentElement FragmentElement
     Fragment element. More...
     
    typedef Base::Index Index
     Index type. More...
     
    typedef Base::Skew Skew
     Skew quantity. More...
     
    typedef Base::Tile Tile
     Tile shape. More...
     
    typedef Base::Delta Delta
     Delta. More...
     
    typedef Base::Iterations Iterations
     Iterations. More...
     
    typedef Base::ThreadOffset ThreadOffset
     ThreadOffset functor. More...
     
    typedef Base::FragmentShape FragmentShape
     Fragment type. More...
     
    typedef Base::AccessType AccessType
     Memory access type. More...
     
    typedef Base::Fragment Fragment
     Fragment definition. More...
     
    typedef Base::FragmentIterator FragmentIterator
     Fragment iterator definition. More...
     
    typedef Base::FragmentConstIterator FragmentConstIterator
     Fragment const iterator definition. More...
     
    typedef Base::PredicateVector PredicateVector
     Default predicate mask type. More...
     
    typedef Base::Storage SharedStorage
     Storage object that may be loaded from. More...
     
    typedef Base::Params BaseParams
     IteratorBase parameters. More...
     
    typedef Scalar const * Pointer
     The pointer type. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    typedef TileTraits_ Traits
     concept TileTraits More...
     
    typedef TileTraits_::Scalar Scalar
     Scalar element. More...
     
    typedef TileTraits_::Scalar FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Shape< 0, 0, 0, 0 > Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE void initialize_predicates (const Coord< 3 > &bounds, const Coord< 3 > &block)
     
    CUTLASS_DEVICE GemmGlobalIteratorAb (Params const &_params, const Coord< 3 > &bounds, const Coord< 3 > &block, ThreadOffset thread_offset_func=ThreadOffset())
     Ctor. More...
     
    CUTLASS_DEVICE void inc_h ()
     Increment the pointer in the H dimension. More...
     
    CUTLASS_DEVICE void inc_d ()
     Increment the pointer in the D dimension. More...
     
    CUTLASS_DEVICE void inc_advance ()
     Increment the pointer to move to the next iteration. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns the current pointer. More...
     
    CUTLASS_DEVICE void residue (Index k)
     That's the residue! Update the predicates. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    - Public Member Functions inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    CUTLASS_HOST_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator ()
     Default constructor. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE TileLoadIterator (Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
     Constructs a tile load iterator. More...
     
    CUTLASS_HOST_DEVICE Scalar const * data () const
     Returns the current pointer. More...
     
    CUTLASS_HOST_DEVICE void inc_d ()
     Increment in the D dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_h ()
     Increment in the H dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_w ()
     Increment in the W dimension. More...
     
    CUTLASS_HOST_DEVICE void inc_advance ()
     Increment in the next dimension. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment, PredicateIterator pred_it)
     Loads a fragment and advances the iterator to the next tile. More...
     
    CUTLASS_HOST_DEVICE void load_post_increment (Fragment &fragment)
     Loads a fragment and advances the iterator to the next tile. More...
     
    CUTLASS_HOST_DEVICE void load (Fragment &fragment, PredicateIterator pred_it) const
     Loads a fragment without advancing the iterator.. More...
     
    CUTLASS_HOST_DEVICE void load (Fragment &fragment) const
     Loads a fragment without advancing the iterator.. More...
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    Params params
     The parameters. More...
     
    PredicateVector predicates
     The predicates. More...
     
    - Public Attributes inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    Params params
     Parameters structure. More...
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    int stage
     Stage argument enables wrapping after some number of tiles have been loaded. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = TileTraits_::kLayout
     The layout. More...
     
    static IteratorAdvance::Kind const kAdvance = Base::kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    - Static Public Attributes inherited from cutlass::TileLoadIterator< TileTraits_, TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH :IteratorAdvance::kW, MemorySpace::kGlobal, Index_ >
    static IteratorAdvance::Kind const kAdvance
     Specifies in which dimension post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies type of iterator fragment storage (Salar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    static IteratorAdvance::Kind const kAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, Advance_, MemorySpace, Index_, TileTraits_::Scalar, IteratorFragment::kScalar, Shape< 0, 0, 0, 0 > >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<TileTraits_, typename TileTraits_::Scalar, TileTraits_::MultiplicandTraits::kKstrided ? IteratorAdvance::kH : IteratorAdvance::kW, MemorySpace::kGlobal, Index_> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ BaseParams

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Base::Params cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::BaseParams
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Base::Fragment cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Fragment
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ PredicateVector

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef cutlass::PredicateVector<ShapeCount<typename Base::Iterations>::kCount> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::PredicateVector
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Scalar cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<TileTraits_, Index_> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::This_
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::ThreadOffset cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Threads cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Threads
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmGlobalIteratorAb()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::GemmGlobalIteratorAb (Params const & _params,
    const Coord< 3 > & bounds,
    const Coord< 3 > & block,
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Scalar const* cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize_predicates()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::initialize_predicates (const Coord< 3 > & bounds,
    const Coord< 3 > & block 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::residue (Index k)
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAdvance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    IteratorAdvance::Kind const cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::kAdvance = Base::kAdvance
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::kLayout = TileTraits_::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Params cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::params
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    PredicateVector cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Coord<4> cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb.png new file mode 100644 index 0000000000000000000000000000000000000000..f6dfb5958383efeffe81aab5acd3787ea820e1cd GIT binary patch literal 3492 zcmdUxdo+~m9>>Rac4(JstAvnp31R0FC6tIU?$;WZB$I|1WM(kN=zUXFaza z?X9+NmE8&gfwqIKEu29hQJzr$Q(RPdEp<=&Re0OyXzOaZxw$Enp9K?-Un1`c)%X7V z`SWv?k?uk(9^q{70uly&YdtNly2APNz!s-nBSmKNI4I7EZ5H%|Ai<{DN)52R;Vq@O z#$?(buC~karvv{~@&T)>q&wTF7H8PnsBf15<5QGnz`A(-13=vmq5VGufof`^2o5~j zX0}3_#rsA0Xn+%`F{DdshZrFRGE+`|?Y8(LN#4`+GB%6?n{TG*1k92QkfFg3`B7}epPRf68K z`=oUzJZT})m?UNMhD(@?=6d5zN4?2>bYG!A*BHx)9O0i;05aK39-jL;_&`d?igpTm zmecw=_Hu4p8(G=SOK%qjdas~@1?6F^TDx4b@fjvgJPrjZ9kun$_G&w61>yod1tD3t z+-&$Q-)^O=Z`&92`AGJQHNpD|=!zC)9oHRQny;lZ2C+VOI3BvWa+!JMJIEM*CK?KreE&jBhd|(f>G!dyS*sh4eilr#Qq_y*_?+NtHuY6LU zMl!e~u;}?yxhpg7(|gEPuk#vJ!1AW=qO;<$2Yja1oe$#y(^o5|M?yH$qqX6heaV)gIz_=*P@P==n@l*tF#X$RpmPtkb9(e8yZ`E5q#sQaooj(Y+1=CZ2p35^HFd zsjAJuk(T7B3aeKA1Ia>S~7hv-Jib z8d>(KR+EWtnelIKgco4?bZ82zzI5jS?0RVe2CBn;G^S)&%ZN|0-pVHEp&KJ)m2913 zWRp@ZHliO2rbC*2I#%}-Gs%L)#3;nIOXr`$F`j3{B{=j3Y}BufW(4WNG@J(}doKDW zEsb>N2;f1_oGQQHM`)U;p7fmV@N_hP{HQ|1&`s>Y9@HDO*FG%yTGFk{M?IiviJpP$ zX|P$Z$xknuowXLI9{#aEH%I}*;rH{hZdHEC8TnX3QBXq}5-fWwSNAYjt>y)XqdK*5 z2&^@vx6Wh@I#^(@&eE#FUzMT3b+4Eug5loJ;bxFdHIl0Gq_SaTy{h2%F|O?v2+t#E4kJKG|epz42!smuB&gT>`J>MlRgAR|HwR(_am!ytT&PD+;Up3nJ=@$#*Jb zXE^Js7waDcZTWCvL^@97H_#SeYZ4M9>I+I3>(xn?e-7Fg=Tr%ZY>N}wmzCJ9h5Ghr zOEXmLTckfJ;+uW-hxPg~@NZ+kaIOCjK+2OH*j9L`?Sg}8nK6#ovoVP7oUcsHOIfVD z9XNHmmB1hup0{D`R(#Mh$~lf(;4LgGz&Gf+l{4JMW6(oZ9gr5{PMFuSB~x=<6knv> zHgc~B-!|LuP#UF~LH}vhn#XvVQIx}`4!sQydeK3Scx$1ZD7D;WfPiEQXzL@koR7ez zC<8@X_>ATnZr(b-cUbY=DvX&4lnJL(c;9umC?%4qKxNC(8ZT(DH-6hRNfChKC~+xX~P()0_C&^8R1R?y*AQb{95QjNA{u+tJ#BIdMr zUQ;sm_=`hh2xuD3^=c$ONllX%gP`Sgdik{VU+Do_z?CO?AC%R(E>gKq>uPTg^XK+z zP;*Xz=WgRDV4XF$)cHsVF8kyBsxhYMdW^dQ_7VqkJ&`pIiS@MXF?v|?VPI}I8a?)u z(IxHl>Nukdznd5unGIA?hQyxM3vL0)4|CD5OZrmm>k$L;Hu!Mu+{1|O@w5CnRFR;) z^J%303dA}{~yoB+=JGGe4 zd9&&8sdGF?PA?VR5>0fQ_EX%p%=Qa0Y0|Y53l2B=?3iC0pX!P@TItr^SW=N*-#|88 zJ5MIalv9Tm=syFcZ~9yd)}5yU()9Bw@Zw$A`_RJ=!x-x|!_~mDXp(6V*)TdHbj`0s zOC+&ao*dwuxjP^B*^&1#A+V^Sl+kjEY0Wbpv%SDaJ<_PDxBZke(Hv7qM^lI3Otm%g z`jd_cpT!0!PbW=3EA*XLuSbt12f7Jok{7fjON%qc#7E!_j4mH`Y=18gE+&L+RkRAw zyLwu}eLR&)T+VN?qGxzw8bS&%UR>(Nb-Mik)TAhT!e!%S5br7qmwgdDcN|l6UV^_1 zRaUzP=E*agq8AE$b{KcAzIbAggBv0_z_DgzN0H3Vn}FiYpnyCAyS$fV8k~2HInB^8|%^9@!yXV?*RnzB7 zaw_WRt%J#rDL1Yx3G}RFXNf0Y9$dZ4y_^vrfg3=;i{3>u3L4<_k@M{#(iX#`KCa~A zx!8#Z?pD^BfC_wPU43Q|XXOT+A7YJfjh_zqCB&p2^6G5oDnB`*R!(J7%3TT{fk#AE z0V90ZGxq5L$!1DVJQrCYYR|)w8xu^d?u8ijQNFmd|6IlAkz`u>%IQ|%( z)2eTmp8XiE+of-J=H)+rRAA?yy~u}>$WKeDi}q=Xai**kth*^LwJ^9)AghGmr8>UsMr@)1t1dQkElrNZnq1S z!L0S;o~8f3Oujgtr?=6YO-J_|!RM-N?-%x)M@a^YhDG#7ca*aruwX8a-_}09^`Y~kp>A+Vt#A~YszfDP$T)Fi1J;T)1D~OvzM}7+U{^O9BbIlnl80h( z;+@ch-{~>>i-LrK8$hpRWg>s5dggK)v*E!pz3pVteR1`|543S$(|Xk#2Ey$XE__Gj zvQylyDE*mXGc|N=`!3;Es(H* ME$uDJ&3$kD3!?3^umAu6 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html new file mode 100644 index 0000000000..71243aedf0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params, including all inherited members.

    + + + + + + + + + + + + + + + + +
    inc_advancecutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    inc_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    initialize(Scalar const *ptr, Index stride_h)cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize(SharedStorage const &storage)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileLoadIterator::Params::initialize()cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    cutlass::TileIteratorBase::Params::initialize(Index _stride_d, Index _stride_h, Index _stride_w)cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Paramsinline
    pointercutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_dcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_hcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    stride_wcutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html new file mode 100644 index 0000000000..d4517b31e9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.html @@ -0,0 +1,193 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params Struct Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params:
    +
    +
    + + +cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params +cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params + +
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index stride_h)
     Initializes params to load a strip-mined tile, given pointer and stride_h. More...
     
    - Public Member Functions inherited from cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (SharedStorage const &storage)
     Initialize params to access storage object. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)
     Initializes params to access a raw pointer. More...
     
    CUTLASS_HOST_DEVICE int initialize (Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
     Initializes params. More...
     
    CUTLASS_HOST_DEVICE int initialize (Index _stride_d, Index _stride_h, Index _stride_w)
     
    CUTLASS_HOST_DEVICE int initialize ()
     
    + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Scalar const * pointer
     Pointer to memory. More...
     
    - Public Attributes inherited from cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
    Index stride_d
     
    Index stride_h
     
    Index stride_w
     
    Index inc_d
     
    Index inc_h
     
    Index inc_w
     
    Index inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::Params::initialize (Scalar const * ptr,
    Index stride_h 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorAb_1_1Params.png new file mode 100644 index 0000000000000000000000000000000000000000..0a3e71c3ee24ee5edd4126f66f62994cebeb4425 GIT binary patch literal 3308 zcmd7VcTiK=8UXOH*w9dIp-4mmDAiD;EyaQoX$Gm%BS;Sl(o0m7s+52Msmr4mK@f<5 zp$HfeVO?q<^e8oiAVLBm@Gd@Q-@ciB|Gs&dNzTcc@631S-1Gg;y|FipuW=t1It&7V zxDonSOhF({Fwl=V#0mU{yJZOh4djNQncnX1F3_c~&lkP(PXc;BzOAjTR@Ffkz~4jn zO^t7YfS|q4;72n>5C|NKxN^}fh+{c>x}H*h_=%2BmCDNQ2SyeNr+X1K}Q}z447eHX=k_#3PaDCG= zh<+u$_HS0$W7S@m?aJxo*`)w8tg$-n4b|LjV^fdDfxVwX8R+aE7KOeD3y_UTgJsi& zd)c4+{o0~zxLTIdVd??GswwwZ7SlTThFZUpY~{1hOJa}lXA+XW)O^*0dlBWMwznuN z@fWdZC&i7bIH2Ws2*{I`=(;B}aTRPHth2wSj?E_LxTpCnjotHIg0p4`DoaKm#{sFnflFpt{|dKQXwMPh9%z+XxH&|MG9u;hi=DB_Mlfp6RCwyOh@ zQ%y<}`8|ut&bf)J_6YR&b*3E=*5D%I|1d~Vt`X(eX+}eLIs!?hm{#P-dQ$=dn_nUS$O zaD7{Dg;^;TFbqtXWxm3w5y!M0@^1@2guJs1tz(?ruL>Sigdhu9`-ulE!@&#A3XhY*8c27Ti>-ID&$cf<*Dq*9!9Sz080LL=Ar=UKtk| zjLt52FOEQ42YNPDM|(OO+LC4Rb@k&#>Mn0x5}e?BDxJxT2(VsbAGyUF+4VUDx?ha1 zz3%a(zads*W8@E(beEhI=q3*#O+Kp_b$+?x6i<(H%s+&Uj4(ABFC|3c@_OvEGJfkm z@KPq#MlF=pbsE8K0d6~_q`)JJiD>U?MuN5;xF_38GoiuIv+5b(lL8ULa0Gg=wIQ#O zE#4)sDY#XhUtQzlhf*^e<*hP~d%Ksz>Tet6?1p?A9Acg&4E5d9v*LVTp@pOgc9cB5e$#pO6`l$E zBza}yH^LIBzoWHb!n~yCn#_US6VO-Unc-y#P=?o2f@gpvE(hB%lnP_TVUI5@_3e2LuCgwSFpq zfCL_lY_3TS0st8r$8ntINY94K6$k;t4G8#N(7F8&_BjG*0O5ccz&Jn#Fy5bn0_lT* zyn7Q(eoV|c#;N7*gsp)0^kiGE%+yCDSx{%4&g+~U2udi^#WRQ zVPa4rR;=QoYP{32qlmbm?a%BGu`ZYHcoZqY4jfD^gzX;#?!x#x=dTXOf%Z%@^^*B@ zEXCsOGf3IDh{MjFEEZm?Uw(2(y=ugFR_!~fZatSxo=Bm#*jhKH4~!2!g{cf=)HBG} zetM@q_NxTVYF!8LE;nQi-EhLexv{~C*Vf+$6OrX8?s0TLe7ko~yf2iy)bMecu9yG1 zT(sadyVE&_@dD+&27!Sx2U1@4eI&P}i~4j-t&Nf0drmn#s`aJ`8DlTv6 zVd>xp13w13Ew*jYlzpRIHVd2w$v706IOF!dzj#v1Yad$oSr*nAM;J?^jI`&(h=o{d zc^{x3RfzTwd|J}Sj+*Ax7z|ka2%v3ev}o{G??T43zV&76oNPyz8P;2YC2}g?Qel3= z-L}-3A=pSv+lpv++pqWuAN2DtIRDc6%-+olmCMh)PhWx$@CfRidm{!hzW~KjYi23* zCbRXFymjrvmrh5yBEoy%h{dxKeOrHv9r$rdo;&fR)a6Wqn##pA?pu~;@pm^iQZ8bJ zDj<*eEgbIbGDf+jSXVjbIg9krPbio#;lF)xQ_PZDmjI3&bI!S_#f?N*6V-XXD#62d za<=1cNiEbq`+zX~@rq!&=F!V^?1x<`)O^O|adzT&gWKSblZy}8}zLtZn$pigI-A{7Y>2N@OZlt(cV1d4@Q0hN~3W%sp@&E{%mE0o?R&eb9i?+XNyX}v2vBtVs~&aaFd-0t$wz%v?0N4X5TAH@Ue6lU)~pF zjNxjV*KWb1sWW(%nQ)1!<(Tzgxyz8mjDz&D9Dv{foL)Zw9x3IU1cUUB^ z2nD%ehCK@t8D3{XHez|RqIxoCK2mOOI<{EA_Yp{CwE00|OMJHPaou(rg2i<0w2xTX zR{diuV`Nnhb++(eytOPCx5-9I&@f?_b;xs)$U0$L!m!zz&E}~0J-s^}H_WV5_3#`4 z>cb%lwwCVEqpcXI+yj%j%#JB#qz*|tq%2FHGxC;_|H$)}md10T#i9&<4w08cPI&DJ zN?3qs)UcX5m9uhN?)z?NDt#G6Qfe>>nsmkA^J#QB8?_eh&Fil+n`#6;!=Au@(7P(+EHDk-hJ}teXYf-J*bkG6@olPvhRu}VN_4hWCF0c{w wLD; + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorAb< TileTraits_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory storage needed by the iterator. +

    + +

    #include <gemm_global_tile.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html new file mode 100644 index 0000000000..e77b99eb67 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd-members.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Base typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    data()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    data() constcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Delta typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Fragment typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentConstIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentElement typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentShape typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    GemmGlobalIteratorCd()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    GemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    ImmediateOffsetStrides typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    inc_advance()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_c()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_d()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_h()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_w()cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Index typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >inlinestatic
    Iterations typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    kAccessSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kAdvancecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kFragmentSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kIteratorFragmentcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kLayoutcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >static
    kMemorySpacecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    paramscutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Pointer typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    predicatescutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    PredicateVector typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Scalar typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Skew typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Storage typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    This_ typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    thread_offsetcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    ThreadOffset typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Threads typedefcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >
    Tile typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Traits typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    valid(int d, int h, int w, int c) constcutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html new file mode 100644 index 0000000000..6af473203d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.html @@ -0,0 +1,783 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >:
    +
    +
    + + +cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > + +
    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalIteratorCd< TileTraits_, Index_ > This_
     This class. More...
     
    typedef TileIteratorBase< TileTraits_, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
     The base class. More...
     
    typedef TileTraits_::Scalar Scalar
     The scalar. More...
     
    typedef TileTraits_::Pointer Pointer
     The pointer. More...
     
    typedef TileTraits_::Threads Threads
     The threads. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef TileTraits_::ThreadOffset ThreadOffset
     The thread offset. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    typedef TileTraits_ Traits
     concept TileTraits More...
     
    typedef TileTraits_::Scalar Scalar
     Scalar element. More...
     
    typedef TileTraits_::Scalar FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Shape< 0, 0, 0, 0 > Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GemmGlobalIteratorCd ()
     Ctor. More...
     
    CUTLASS_DEVICE GemmGlobalIteratorCd (Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int offset=0, int pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
     Ctor. More...
     
    CUTLASS_DEVICE void inc_c ()
     Increment the pointer in the C dimension. More...
     
    CUTLASS_DEVICE void inc_w ()
     Increment the pointer in the W dimension. More...
     
    CUTLASS_DEVICE void inc_h ()
     Increment the pointer in the H dimension. More...
     
    CUTLASS_DEVICE void inc_d ()
     Increment the pointer in the D dimension. More...
     
    CUTLASS_DEVICE void inc_advance ()
     Increment the pointer to move to the next iteration. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Test the validity of the iterator. More...
     
    CUTLASS_HOST_DEVICE Pointer data ()
     Returns the raw pointer. More...
     
    CUTLASS_HOST_DEVICE Pointer const data () const
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + + +

    +Public Attributes

    Params params
     
    Coord< 4 > thread_offset
     Offset of an individual lane from the start of the tile. More...
     
    cutlass::PredicateVector< Base::Iterations::kW > predicates
     The predicates for the row. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = TileTraits_::kLayout
     The layout. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static IteratorAdvance::Kind const kAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileIteratorBase<TileTraits_, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Pointer cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Scalar cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorCd<TileTraits_, Index_> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::This_
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::ThreadOffset cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Threads cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Threads
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GemmGlobalIteratorCd() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::GemmGlobalIteratorCd ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ GemmGlobalIteratorCd() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::GemmGlobalIteratorCd (Params const & params,
    const Coord< 3 > & bounds,
    const Coord< 3 > & block,
    int offset = 0,
    int pred_offset = 0,
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::data ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ data() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer const cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_c()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_c ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::kLayout = TileTraits_::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Params cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::params
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    cutlass::PredicateVector<Base::Iterations::kW> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Coord<4> cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd.png new file mode 100644 index 0000000000000000000000000000000000000000..13e8ac2aa98ea568784f961bfbb610a30a1d8515 GIT binary patch literal 1734 zcmcgteKgx=6#gYNQB6}tRf=jmMn@}RR2wCV5bZQYgd)pUG9Q)In#@P)qkh{_iV#XE zv2{L5tL2!`d=Q$7Zf#|Xs?S(enN}GQqGajz@AmKBbMAYed(U&vz305oJ#QA#&wIP> z9$f$cwiA3j{Q&?9YA{hpOLLRHXB%pqG4WKOms+jXzy-xjO&2p?gZ?(LSRCAx7^Dex z82)|%fQIzp4Ez#^0RR|@;OQQi2zgsELSGUhJxjX9%1yTLnFQ_+mu>_EMz3gdtFBpc zZWi~17DtBMi3~A{4JnSy2)1~{u{g;|4W|(~wuCZE2WvvH#S;#yNRy;JLilnl3C*8U z8%NCvf7H3SCJkO7II4m_A=Lh{Hq?C>ui|IXZ+Rr_Hps^xj<{K7($SoIm3Q}*+i;4l z@s_b|uR!}Bs_7@^?Ua|%{MSEVTyGgYlEy;?PsSQ79Fw1T-rSFCGey>kP^*A~(neSl)wCiBeHeVI^O!&rnKN$mAI=yXHJ{Zkc`k%8y9pvpR6Mem;Dp-Z;!j zF`v`v7r%q>Zc5GXDC|z5z(=C3nCO;8_eeF~u3bX)Sf88{7I3u_T6ViR*t^`dLQs5g zJ85xQ<7_Dpfh!3gQ zjyQQ}0-|&W#A&mS#yWYM2<5G3Bf-gwXlsJ`%}KiDtW+Twr^JVq#1T{|QN)`&^*WVQk|dfL=C(Hg;B*i3bv zWrwvtov!AyhTgVOcC;s#;1fb^C<7@ySDj$+@$}Uy*|2qABrK6E!h%~Lp4%6c8$e~v zY|nF+%ZMf6NT~4U1%x9Q&i7-TkLmtusVhe$M1+&;v5Zfcj@w>~65D=*fYO?b zA3Jbzmc!~m=pe$CBXp9y^l7EEebX$;#3ZgL9wcREqNA5q6bDR`W-8|dF^srdtPA2r zZCI-p$&Mm2)2`hlhL??<%5+ax4um@BESROpf&jeg=eSe^$iix^`lJl>H)Ly-$KKe! zFs&*-w(6~qq(Y=iXGF%D#}D`q*;Si>)kR-k^Uo&v9_+ZMhKR|Rvt@*6nɤwr}k JcTZ3={sN_NFcbg) literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html new file mode 100644 index 0000000000..ddee2bf204 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Delta typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kIsContiguouscutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kStrideHcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    ThreadsStrides typedefcutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html new file mode 100644 index 0000000000..755f67e6fc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.html @@ -0,0 +1,280 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kCDelta
     Override the strides in each dimension between different loads/stores. More...
     
    typedef Base::Iterations Iterations
     
    typedef Base::Threads Threads
     
    typedef Base::ThreadsStrides ThreadsStrides
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsStrides
     The threads strides. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kStrideH = kStrideH_
     The stride in the H dimension. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout
     The layout. More...
     
    static bool const kIsContiguous
     Is it A^N or B^T? More...
     
    static int const kAccessSize
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Iterations cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::ThreadsStrides cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadsStrides
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kStrideH

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::kStrideH = kStrideH_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..5f8b970e0ce42979e99ea9b7a1ca8e32ae374aa0 GIT binary patch literal 2231 zcmd5;dpr~BA0BFoR_+vy*hrnEG)IY%Tt>MRHn~i{isGjUW#xKK3P-strCQ4=n@emG z&21ZH?2t_lVl(hP6iq+IP?!H<1?7e~>FkeHS|WUkTiLg5bJ6q@i7YRf!vt>4n5Lrtv@LH#Xa}3@ z(OWpkp~h6iaHxaS7|w^B)3`IZLL5;asdW%xi8$wipg~BGDQp%3ffh^Nu@mjk(s@on6g|DI&kMr)$CpTL1DT@O78E$%B0`66oVNxsvVgosmH zO92Nb7~#gPZE;8Xlh;+>=-5CIXH)hI>!IEwM}L}SW=0xR?@DR{XFwbR(@fev6HHb9 zd@ZubS{{F<0dFWGdI%ZDy*arqI79t2>-l1LM(Y(AI2{ATcjR2cCtl zc_tTk%)1w`t(P&u@#qap!dPP{_4;W$+jf~=g9#0(-u>1D$eYlUGIk7_PGzcJ(u;qx zGY)K#QUwZqzj(JQrNuw}aRpevN@Z$#lfPlhw_a9NcwN)vk)B*m$4~oewcR)JeQy&m z<7d}Y=C$|6&{U=geC;X8F>Lqh{n=|8eu2$HEXI*5Wxz(kmPMiXg#BiCoQ)QIWBd48 za9mDa|EVgtCyh8@$t@w;ZaCBlv0vAXK#tJ2eg$?Gj2qcoN+f*K`)yV?BOF^ITFWkSBvVyTW`aVRqaFYdaliez3^SoF zv?%}bS7+Lem&0!i5J=E;xhS%X`E#a}u=v5b;Pd=>{8&$RJ-F%Rz~;e3lGOv`?JePW zkw5wawDEktA>M6yK?!idxOf)PE2fPDI9lh}R{nb(TxzF@p%))TD z+F`PWhRCBaP@ClC0pA?KU@vM=S&}8Dbg-P8IB9#bOmGHmh@b^;d&b_i+T*gG?COaF zvD5W_5c|@lsXf9xawHR7CG-PN+uP{11?6|dmEXj5TSH%K$+_yH#5Jr4Jm~#>{*b*P zE2-j?8v*_{8ho$+(~{KHXG4=j))@Nwi23_$*w!hh8j4-jRnY_wb-~egB_+TonJ=XN ze&0f)v4YM}kAQH2nEH!{r$ziKd!yzl_HI^pM@S&QM5x_u`y}o~qWp8D!`P!hgxXM6 z5ixK6q90;CXPy0sVJ%6DAFGyGVvy!ilnk=)%F%s;HiH-_9x9@)M*m`bj0OoEA{#F0 z2H{LYYYK=cH2wghCignDQP{aFjr}uZ!Dlg3v>GxAy>cl9hedcI6n2&6|BVKAc}_ts zp@`_c*+s-c2>AshOyTvqyWA`uHO5O303<~KgoOeu(=GV$bR8$P$(e^t6(Zo#$22+g ztAf@o{HHYhl6~AsEnEpIRFDCMG)2#_uhx!yWt0ozhoA@xg`mia{|DiV_C8Xfza@$2 zX*o0Fs^poNN*WIeo=*dVS9Fv~vh^kIDqOSAgWaTu3-_=a4C{rI46`%Uoo z8d*^3Jy7e(m4)-a<(3hGUVEk_eA}DZ1`umX1Ew%u&nWSz4fQELm>p;TgLIn3UG7g2Rd9FU-Y z(D!QIqRPtPt=i0yh&mF${+s~;GpF0y`Nl3fg0*Ue#j_;|>E41So*B<$@q z`i93ROW5$JegN-9qz3x?um1tw=~M)i#UexeDj{NSdl}iVfpWPaQEAWI^C_7bAuqxfu(*qM7No3ccS_5?}-_&o)r~- z0;)FOQd{oZBctD03U0DUz|esv_(~DD>*C<|v(2;b40dz5=*QDAo#o}GvU>Az>ZI@G z%Auu%r0R%a_T3R!o9tzStGi+RA#&cMKf-U2mDZSI(@z77ovSM`NWMz>bAAbTE*_v7 zt9UX1Y( JIT8Nr{{VFGQM>>E literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..9e253bc1cd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..171e163453 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCdTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html new file mode 100644 index 0000000000..aa6c60c858 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html new file mode 100644 index 0000000000..9b30fd7844 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1Params.html @@ -0,0 +1,298 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_global_tile.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld, Index bound, Index epilogue_stride_w, Index epilogue_delta_w)
     Setup the params. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Pointer pointer
     The pointer. More...
     
    Index stride_h
     The stride in the H dimension to setup the thread in the block. More...
     
    Index inc_advance
     The strides to increment the pointer. More...
     
    Index inc_h
     
    Index predicate_inc_advance
     The strides to increment the predicate offset. More...
     
    Index predicate_inc_h
     
    Index predicate_offset
     The column offset to compute the predicate for the columns. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::initialize (Pointer pointer,
    Index ld,
    Index bound,
    Index epilogue_stride_w,
    Index epilogue_delta_w 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_advance
    +
    + +
    +
    + +

    ◆ inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_h
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Pointer cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::pointer
    +
    + +
    +
    + +

    ◆ predicate_inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_advance
    +
    + +
    +
    + +

    ◆ predicate_inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_h
    +
    + +
    +
    + +

    ◆ predicate_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_offset
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::Params::stride_h
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html new file mode 100644 index 0000000000..c390004d4c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalIteratorCd_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory storage needed by the iterator. +

    + +

    #include <gemm_global_tile.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html new file mode 100644 index 0000000000..92fd6a4c08 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Delta typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kStrideHcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html new file mode 100644 index 0000000000..3aed66b5bb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.html @@ -0,0 +1,298 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kCDelta
     Override the strides in each dimension between different loads/stores. More...
     
    typedef Base::Iterations Iterations
     
    typedef Base::Threads Threads
     
    typedef Base::ThreadsDelta ThreadsDelta
     
    typedef Base::ImmediateOffsetStrides ImmediateOffsetStrides
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kStrideH = kStrideH_
     The stride in the H dimension. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout
     The layout. More...
     
    static int const kAccessSize
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::ImmediateOffsetStrides cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Iterations cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + +
    typedef Base::ThreadsDelta cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kStrideH

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::kStrideH = kStrideH_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..e3cb008b59f336be86e7bafcc5ef41f3201a280a GIT binary patch literal 2215 zcmc&$X;>228b-|}i*RY$3^jAfbTYM^QAf<{iu+YFQ^Tz+FX6r`;!-JD;w4J0QM58M zOC?mwl}XAa%)o^dQv!1`1Y!~)P#mWF=l+@d_nzlD=e*zdzCX@+&hx&{K?nMK0yXz( z0ssIY)XOah08kbxaG-{YVr{aYB`CH%fxaQ`o12>o_(k;T!HbMa1^Pb8WO5iGJ5*6> zqzCy20~Dga#?)wtjiO}-b#n>HR^rw2Qn`n>yYFV&iZ=|tb)kVe8beL?W{47pm16`| zY4id4Qv~zYY!|`@!R+emduMEJ0yT%`rkMz_IPCr7IEqh@H+DKu$4b!+cZK#EHyS4Z z{mQIJ2cQAX`Rh=6WRE-XkF}5SUlQUDI@Fyhm!xW8SHph1IYp@9O8=e*WsFv06Q+{(Wgd921DQwPy!~^e%RGShqBKHv=`kGcq8tT;? zQ7HlMLG=Ok9l{;gvRVVm1TvQOMAR`%N!g>>Cy;rc8{A1l2EygN{=$VOmS>^|yL6#Q z1~DN?_KZ=a0qCj_hnM!={~E-*>S4^<+GwocAiW>ZX>9I~+A-;&lzXj ze|J$+#}85_cA13jI=I{&vwupyf|D$@I|m~j4`IPh7j1j&h10H#knTchM03mHl%evw z2U(8shp$(826%1Qv#^|om?DLXXWmWfK zDH!J+{L_9BueUGs*i1ff?6lq0`5VNjpDRqM{ow*##iqOLvQwOTgwt>uQ@*C{gE=X za&f<)(PnRke6ls?s7EPje7BR(6W8S$@;XAhq%O~-Oyy-ayS;m)$C>adzn3aL=_KS; zkbsD}qG7*2Zcnra8}Y@?BzSLC8x7o2?-BeJ9}G)&F;bD1YP@tvy;LY|^%O^feJZ?>sDZQgbt`$_Mf1TjO0=_srAf5Xa z6MC%BU)&hcG+@$h@MXUz^pI%Uovg|(ll zZxCS-j7xC|L=5-JUN)y594{jsTClHl*$#l+Y>KO)d7i3{AtI7vnt!J-Z1(usP2E53 zT=E!Jumu*-YjHB~=&c;sh|=oTOI@of)LL#D8;KD(#3ODG`p<4_8P~6$$Y2-I6UZQP zi^qk;S&dnlL1C5ike$H*u4nOXb&(y6x(+hzhh?9UcF6;;#8mx4B!V9t?DrRT)C}3O zpAhp<`?Ki#D>6;RXO+~hTLZGMQy9{kG=sRsay_jiUFQSyX8#1`_hQy0b$H`KRGYx8 zf2uVJggkE-y%jMGwe+Gz|6?f(BoYTIWu<{x#YM-dFd4 zb?uKlox~3gJ3mwP%w2&@>Kn|~#vX~OHT5F3rUMdAE1nVcE|jzMiY*$ + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..524a06aa49 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileCdTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kStrideH_, int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmGlobalTileCdTraits< Scalar_, Tile_, Threads_, kStrideH_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html new file mode 100644 index 0000000000..6225081e82 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits-members.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html new file mode 100644 index 0000000000..4e61285ebf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.html @@ -0,0 +1,400 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > +cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = kOperand_
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout = kLayout_
     The layout. More...
     
    static int const kAccessSize = kAccessSize_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<0, Threads::kH, Threads::kW * kAccessSize> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Threads::kW * ThreadsDelta::kW, kAccessSize> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kAccessSize> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ MultiplicandTraits

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef GemmMultiplicandTraits<Tile, kOperand, kLayout> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::MultiplicandTraits
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef ReshapeThreads<Tile, Threads_>::Threads cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef Shape<1, 1, Tile::kC> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + +
    typedef ReshapeTile<Tile_, kAccessSize_>::Tile cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kAccessSize = kAccessSize_
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kLayout = kLayout_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kMemorySpace = MemorySpace::kGlobal
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::kOperand = kOperand_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..4c9bada4639d8a2173d684662ed65cdcc38edc1f GIT binary patch literal 2856 zcmc&$c{E#T8^0!>P(){1HL9j&deDj5s-dVQdI%x0WE!Mt>?3wfi`p78oifuq}3ur(bh1Q5|T;vP$5l&(o$RU-RR7mIp1IN-}jz#@AEwO{O2eLxJ zcS;R8xUy_@%MLNT_9{Q9?KKo9suhhQo3uXs_yA~cp~*HRyd^~JpoX?3ozix@tqran z=jF0Qa%!iRY)j`I8TB8CBqC#{bSnmbr)tOSEK219i?+n(h5b!VifwW&Rg}@dFY;BL zql{bc7>3B*2nBb>jBldvA)dUXQ0pTikeiYxlC@QK=I1IJfIEMN%oScZz1Gy1sy9hY zNVQ`Noh}q*P=p3cv+1!v-wgf+i+)95<#cE`TWp4vpF`EcoEs0XQw!2Avo>Ap8YD-OqW3PQTuhnO17pyRC{q}x> z{@y$Sc8873k)~I!2hW!!J`N>0Da14pG;5}2X*45#c1OKI+VCb!?Y4f>#qjsPFfF3P zxM?9?JL=&OFn^HM>om!2-ZuW>=svTqvhr}s6DLaYzzx`XDXDuDNZ-!L(9Xs}l})tIGI z%)dfgG}qKJje9oBf;G>{)iF;%NObwY`r>&_o@B_{PK^c?O*BLf1<^n^7z6Ob-dA>ug+E@At$bLoG39D%Pp;m6orYK0FZ5 zcB%kP0UtT{Hr>g7CY`#c-QTsUUT*JRvpzx1Eot4Y+zsUd$@L9B)M_x**fQA9?@MN> zn=x$rrx!rf1Vyg2`y;RJ!JDHrxer~dPalZ^z<72J0_HNP0OY8p*%a(Wwof%!Fa`}6 zSp((4!PZqiwx12CWPS^#rLX(hfBpylvE%hlV8;-j0#G)fk=7)D`U5Iya^MJfd0fWR zu617!-+JW}ja`ea`;W){c4DwZ!50C@`KvH+O6+3Vn=k(fMk@RbUSP}4ukSC5m!QzQSqFKBsjLf%& zVub9xbVo20s)q}nwNY6;KIY>{lPbEkXq5%uiW1udh2loGkYggprFz~9BG$Op} zHqK=OP!9twE&9D&oUObgBU9%fS~ESgoDr<^)Oplo5~&M{(2t_|-&@5R3$ZbbchFTu z(k#X&aJRHZM==<=wLt!4oe}7R?Yw`1K=7VQ!Ly1}{DXr+l0d+nu=H zi8&mZr&Ds_+j4ESIvtyrksGIKSSa1PUsL@mu4pXS2&(pQO6O-XH`^I$I^za=%Us{~ z%tX3VExqca(RFdps7Qt?KR;1|pkO#&`iulI6TLcW^DS%(icQ zD?E$JLS2H8td{(;q?pkc-CM~~LZ`_eI+sbSS%e(gL|s)JmwZhwAm^^XyTJ7*rL5D8 ziubAY!iB%Lz%#N61Lw~l#2+k+s3Cx1R}9Bp^wbRCOvhZ|?Gk-><-x0-*@yHkH5aCT zxJNAQzib<8iyqe8!DWR&u=^ix8SqW!Pv4P0AthxfKnAaP>z%X7UY`iaOW7*E*4mlG zDV-VBJl47&tK!k9-|HsZs=s*L}yfanL7BX zO?fj-J{6WlZ9y5U#IAM4k@niMwq|vD)iW|#)98dTIC@#bhC|?l|8%t8#P7*z#=gdO-Ku!Cd8Y8E>PI?l78m#NHlpcAEgeqP z&k<^9ora^)iMA2tBc2T-P0tsCAXr|Rb$h3;9-O%!Vxt^$C{)v5lAn?C^^gTQZUBE^ zZZ=_(zV&E+okTp>c%UhUU0_5>|3niyn*7jplqL{Up9tG4liYv7VvBA@C!k#-Bc{pa zOLUMs_d}sxx6Jg>2c#5`Jd*Kuv+*a-aHy#v<=H*NW3h^%hl|9gRXK~dEb4h3q&cQT zhFTE;KQOL7Y458*xig>xXNG(GsgdW!FZ2b7#!cKQw5h}46eKP*K*)t|Cf)(%tvO@QyE|(zR42 zzY_facty>cPxfW#b7QtPU9L6OjJZNmYgsyNg#KH(+#)Eqv7PnGvXa?2l3F$LIqcmf zYxL_mS*SdBuN9m6Bj_>#79#fM6IZl6c@wodJNtbR9%xf=5((WCII=;+xLIlUD^wqi zVxW3w4 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..7b47addb66 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmGlobalTileTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html new file mode 100644 index 0000000000..db9bc1bc7c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html new file mode 100644 index 0000000000..121fe8cce1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmMultiplicandTraits.html @@ -0,0 +1,228 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    + + + + + + + + +

    +Public Types

    typedef ThreadBlockTile_ ThreadBlockTile
     Shape of GEMM thread block tile (K, N, M) More...
     
    typedef platform::conditional< kKstrided, Shape< 1, ThreadBlockTile::kD, GetExtent< Usage, ThreadBlockTile >::kExtent >, Shape< 1, GetExtent< Usage, ThreadBlockTile >::kExtent, ThreadBlockTile::kD > >::type Shape
     Map the ThreadBlockShape onto (kH, kW) dimensions for A and B operand. More...
     
    + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kUsage = Usage
     Identifies multiplicand. More...
     
    static MatrixLayout::Kind const kLayout = Layout
     Layout of tile. More...
     
    static bool const kKstrided = (kUsage == GemmOperand::kA ^ kLayout == MatrixLayout::kRowMajor)
     
    +

    Detailed Description

    +

    template<typename ThreadBlockTile_, GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    +struct cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >

    + +

    Determines the shape of a multiplicand tile in terms of strided (H) and contiguous (W) dimensions

    +

    Member Typedef Documentation

    + +

    ◆ Shape

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + +
    typedef platform::conditional< kKstrided, Shape<1, ThreadBlockTile::kD, GetExtent<Usage, ThreadBlockTile>::kExtent>, Shape<1, GetExtent<Usage, ThreadBlockTile>::kExtent, ThreadBlockTile::kD> >::type cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::Shape
    +
    + +
    +
    + +

    ◆ ThreadBlockTile

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + +
    typedef ThreadBlockTile_ cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::ThreadBlockTile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kKstrided

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + + +
    + + + + +
    bool const cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::kKstrided = (kUsage == GemmOperand::kA ^ kLayout == MatrixLayout::kRowMajor)
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::kLayout = Layout
    +
    +static
    +
    + +
    +
    + +

    ◆ kUsage

    + +
    +
    +
    +template<typename ThreadBlockTile_ , GemmOperand::Kind Usage, MatrixLayout::Kind Layout>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmMultiplicandTraits< ThreadBlockTile_, Usage, Layout >::kUsage = Usage
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html new file mode 100644 index 0000000000..03950b5a98 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >, including all inherited members.

    + + +
    Congruouscutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html new file mode 100644 index 0000000000..39721d9540 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmOperandTraitsAb.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ > Struct Template Reference
    +
    +
    + +

    Helper to describe attributes of GEMM matrix operands. +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Attributes

    static const bool Congruous
     
    +

    Member Data Documentation

    + +

    ◆ Congruous

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_>
    + + + + + +
    + + + + +
    const bool cutlass::gemm::GemmOperandTraitsAb< kOperand_, kLayout_ >::Congruous
    +
    +static
    +
    +Initial value:
    =
    (kOperand_ == GemmOperand::kA ^ kLayout_ == MatrixLayout::kRowMajor)
    +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html new file mode 100644 index 0000000000..5e175f55eb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerLdscutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html new file mode 100644 index 0000000000..8851f0a929 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits.html @@ -0,0 +1,463 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kScalarsPerLds, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kA
     
    static int const kScalarsPerLds = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kScalarsPerLds, 0> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kA
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLds

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..7d022958ae --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..03c381f809 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorATraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadIteratorATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html new file mode 100644 index 0000000000..e964f6ecb9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerLdscutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html new file mode 100644 index 0000000000..a075d3ca4c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits.html @@ -0,0 +1,463 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kScalarsPerLds, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kB
     
    static int const kScalarsPerLds = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kScalarsPerLds, 0> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kB
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLds

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..ed251e5385 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..0814240e40 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorBTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadIteratorBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html new file mode 100644 index 0000000000..0629b20968 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    kIterationsDcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsHcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsInHPerWarpcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerLdscutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html new file mode 100644 index 0000000000..1cf22fdf8d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits.html @@ -0,0 +1,504 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kScalarsPerLds, kScalarsPerLdsTile
     The tile. More...
     
    typedef Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kScalarsPerLdsIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kScalarsPerLdsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerLds = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
     
    static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
     
    static int const kIterationsD = kIterationsInHPerWarp / kIterationsH
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kScalarsPerLds, kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kIterationsD

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsD = kIterationsInHPerWarp / kIterationsH
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsH

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsInHPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerLds

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..af32d15419 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..f055988fd4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadIteratorDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html new file mode 100644 index 0000000000..a317e544fa --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html new file mode 100644 index 0000000000..27c32f35e9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits.html @@ -0,0 +1,482 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kA
     
    static int const kAccessSize = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kAccessSize = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kA
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..3e308db63e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..0731bce879 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileATraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadTileATraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html new file mode 100644 index 0000000000..782aa8415e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kOperandcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kThreadsPerWarpcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    kWarpscutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithoutSkew_ typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    TileWithSkew typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html new file mode 100644 index 0000000000..097ce43efa --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits.html @@ -0,0 +1,482 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef Shape< kStages_, OutputTile_::kD/InstructionShape_::kD, GetExtent< kOperand, OutputTile_ >::kExtent *InstructionShape_::kD > TileWithoutSkew_
     The tile without skew. More...
     
    typedef Shape< kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW+kSkew_ > TileWithSkew
     The tile with skew. More...
     
    typedef ReshapeTile< TileWithoutSkew_, kScalarsPerLds_ >::Tile TileWithoutSkew
     The tile without skew after reshaping. More...
     
    typedef ReshapeTile< TileWithSkew, kScalarsPerLds_ >::Tile Tile
     The tile. More...
     
    typedef Warps_ Warps
     The number of warps. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in a warp. More...
     
    typedef Shape< 1, 1, TileWithoutSkew::kW/kWarps/kThreadsPerWarpIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > Delta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< TileWithSkew::kW, 0, kWarps *kThreadsPerWarp *kAccessSize, 0 > ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static GemmOperand::Kind const kOperand = GemmOperand::kB
     
    static int const kAccessSize = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kWarps = GetExtent<kOperand, Warps>::kExtent
     The number of warps. More...
     
    static int const kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
     The number of threads in one dimension of the warp. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<TileWithSkew::kW, 0, kWarps * kThreadsPerWarp * kAccessSize, 0> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, TileWithoutSkew::kW / kWarps / kThreadsPerWarp > cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithSkew, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ReshapeTile<TileWithoutSkew_, kScalarsPerLds_>::Tile cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    + +

    ◆ TileWithoutSkew_

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, OutputTile_::kD / InstructionShape_::kD, GetExtent<kOperand, OutputTile_>::kExtent * InstructionShape_::kD> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithoutSkew_
    +
    + +
    +
    + +

    ◆ TileWithSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kStages_, TileWithoutSkew_::kH, TileWithoutSkew_::kW + kSkew_> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::TileWithSkew
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kAccessSize = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kOperand

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    GemmOperand::Kind const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kOperand = GemmOperand::kB
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kThreadsPerWarp = GetExtent<kOperand, ThreadsPerWarp>::kExtent
    +
    +static
    +
    + +
    +
    + +

    ◆ kWarps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::kWarps = GetExtent<kOperand, Warps>::kExtent
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..387441df03 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..07f462ac6c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileBTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , typename InstructionShape_ , int kStages_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadTileBTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, InstructionShape_, kStages_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html new file mode 100644 index 0000000000..afc22fe8e5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsDcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsHcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kIterationsInHPerWarpcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html new file mode 100644 index 0000000000..043d8c3ae2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits.html @@ -0,0 +1,523 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSizeTile
     The tile. More...
     
    typedef Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kAccessSizeIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kAccessSize = kScalarsPerLds_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
     
    static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
     
    static int const kIterationsD = kIterationsInHPerWarp / kIterationsH
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kAccessSize = kScalarsPerLds_
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsD

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsD = kIterationsInHPerWarp / kIterationsH
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsH

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
    +
    +static
    +
    + +
    +
    + +

    ◆ kIterationsInHPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..1a9ffe26c4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..d68dda08ba --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedLoadTileDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedLoadTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html new file mode 100644 index 0000000000..f757540dbf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits-members.html @@ -0,0 +1,100 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html new file mode 100644 index 0000000000..ca8c11acbf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits.html @@ -0,0 +1,325 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Tile::kC, kScalarsPerSts_ > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/Threads::kC/kScalarsPerStsIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kScalarsPerStsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = 0
     The skew. More...
     
    static int const kScalarsPerSts = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / Threads::kC / kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Tile::kC, kScalarsPerSts_> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadsStrides
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerSts

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kScalarsPerSts = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kSkew = 0
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..60774aa5d8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..d79437c0ed --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html new file mode 100644 index 0000000000..7e7090f2d6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerStscutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html new file mode 100644 index 0000000000..27f54141f7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits.html @@ -0,0 +1,426 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kScalarsPerSts, kScalarsPerStsTile
     The tile. More...
     
    typedef Shape< 1, 1, kScalarsPerThread/kScalarsPerStsIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kScalarsPerStsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerSts = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, kScalarsPerThread / kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kScalarsPerSts, kScalarsPerSts> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerSts

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerSts = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..f143cd655b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..d9a6a9a30a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreIteratorDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html new file mode 100644 index 0000000000..eb2702a3f7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html new file mode 100644 index 0000000000..f755f52fca --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits.html @@ -0,0 +1,344 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Tile::kC, kScalarsPerSts_ > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/Threads::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, Threads::kH *ShapeCount< Tile >::kWc, Threads::kW *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = 0
     The skew. More...
     
    static int const kAccessSize = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / Threads::kC / kAccessSize> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Tile::kC, kScalarsPerSts_> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadsStrides
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::Tile
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kAccessSize = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::kSkew = 0
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..6157a4d397 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..876eea666a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html new file mode 100644 index 0000000000..5749940cee --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits-members.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerRowcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerThreadcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    kThreadscutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >static
    OutputTile typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Pointer typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    ThreadsPerWarp typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    Warps typedefcutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html new file mode 100644 index 0000000000..a5e0b8d6ce --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits.html @@ -0,0 +1,445 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef OutputTile_ OutputTile
     The dimension of the output tile. More...
     
    typedef Warps_ Warps
     The warps in the tile. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The threads in the warps. More...
     
    typedef Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSizeTile
     The tile. More...
     
    typedef Shape< 1, 1, kScalarsPerThread/kAccessSizeIterations
     The number of iterations needed to store the tile. More...
     
    typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static int const kAccessSize = kScalarsPerSts_
     The number of scalars per LDG/STG. More...
     
    static int const kSkew = kSkew_
     The skew. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
     The number of scalars per thread. More...
     
    static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
     The number of threads. More...
     
    static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
     The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 1, kScalarsPerThread / kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef OutputTile_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::OutputTile
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadsPerWarp
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ Warps

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + +
    typedef Warps_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Warps
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kAccessSize = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerRow

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerThread

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    + +

    ◆ kThreads

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..8a28c51900 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..673f9afb92 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreTileDTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html new file mode 100644 index 0000000000..65870e112f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits-members.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    kMemorySpacecutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kScalarsPerStscutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Threads typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    ThreadsStrides typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Tile typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html new file mode 100644 index 0000000000..bd68edc4fe --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits.html @@ -0,0 +1,344 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef nv_std::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile TileWithoutSkew
     The tile without skews. More...
     
    typedef ReshapeTile< Shape< Tile_::kD, Tile_::kH, Tile_::kW+kSkew_ >, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 0, kScalarsPerSts_, ShapeCount< Tile >::kHwc/Threads::kW > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    typedef Shape< 1, TileWithoutSkew::kH/Threads::kW, TileWithoutSkew::kW/Threads::kH > Iterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kScalarsPerStsDelta
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = kSkew_
     The skew. More...
     
    static int const kScalarsPerSts = kScalarsPerSts_
     The number of scalars per STS. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Threads::kH * kScalarsPerSts> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, kScalarsPerSts_, ShapeCount<Tile>::kHwc / Threads::kW> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadsStrides
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Shape<Tile_::kD, Tile_::kH, Tile_::kW + kSkew_>, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsPerSts

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kScalarsPerSts = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..1eda0c8653 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..5cd71d1b77 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewIteratorAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreWithSkewIteratorAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html new file mode 100644 index 0000000000..0a64b450cb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >, including all inherited members.

    + + + + + + + + + + + + + +
    Delta typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Iterations typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    kAccessSizecutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kMemorySpacecutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    kSkewcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >static
    Pointer typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Scalar typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    Threads typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    ThreadsStrides typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >protected
    Tile typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    TileWithoutSkew typedefcutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html new file mode 100644 index 0000000000..ed1fb90bd4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits.html @@ -0,0 +1,375 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Classes

    struct  ThreadOffset
     
    + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kScalarsPerSts_ >::Tile TileWithoutSkew
     The tile without skews. More...
     
    typedef ReshapeTile< Shape< Tile_::kD, Tile_::kH, Tile_::kW+kSkew_ >, kScalarsPerSts_ >::Tile Tile
     The tile. More...
     
    typedef Threads_ Threads
     The threads. More...
     
    typedef Shape< 1, TileWithoutSkew::kH/Threads::kW, TileWithoutSkew::kW/Threads::kH > Iterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, ShapeCount< Tile >::kWc, Threads::kH *kAccessSizeImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    + + + + + + + + + + +

    +Static Public Attributes

    static int const kSkew = kSkew_
     The skew. More...
     
    static int const kAccessSize = kScalarsPerSts_
     The number of scalars per STS. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
     The memory space. More...
     
    + + + + +

    +Protected Types

    typedef Shape< 0, kScalarsPerSts_, ShapeCount< Tile >::kHwc/Threads::kW > ThreadsStrides
     The strides to compute the base position of the thread. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Threads::kH * kAccessSize> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Delta
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<0, ShapeCount<Tile>::kWc, Threads::kH * kAccessSize> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Shape<1, TileWithoutSkew::kH / Threads::kW, TileWithoutSkew::kW / Threads::kH> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Iterations
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Scalar_* cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Scalar
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef Threads_ cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsStrides

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    typedef Shape<0, kScalarsPerSts_, ShapeCount<Tile>::kHwc / Threads::kW> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadsStrides
    +
    +protected
    +
    + +
    +
    + +

    ◆ Tile

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Shape<Tile_::kD, Tile_::kH, Tile_::kW + kSkew_>, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::Tile
    +
    + +
    +
    + +

    ◆ TileWithoutSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + +
    typedef ReshapeTile<Tile_, kScalarsPerSts_>::Tile cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::TileWithoutSkew
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kAccessSize

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kAccessSize = kScalarsPerSts_
    +
    +static
    +
    + +
    +
    + +

    ◆ kMemorySpace

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    MemorySpace::Kind const cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
    +
    +static
    +
    + +
    +
    + +

    ◆ kSkew

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..1298ee529a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..cc55e56e80 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmSharedStoreWithSkewTileAbTraits_1_1ThreadOffset.html @@ -0,0 +1,129 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset Struct Reference
    +
    +
    + +

    #include <gemm_shared_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kScalarsPerSts_, int kSkew_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::GemmSharedStoreWithSkewTileAbTraits< Scalar_, Tile_, Threads_, kScalarsPerSts_, kSkew_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html new file mode 100644 index 0000000000..8c1ffaf9e4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperA< Kind, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..9bca290fbb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..7affa6ef61 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,236 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^N. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^N. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^N. More...
     
    + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, GemmConfig_::kScalarsPerLdgA> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout = MatrixLayout::kColumnMajor
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..9ce259eb1e218a474520353a3cbaf3ddbdfc43ad GIT binary patch literal 1679 zcmbuAeKga19LIlh(8ZFIiYPs7Zmz7WOrBR^j6$V{8;K_Fu*BMEGo(BnEEcQCEgdQ; z4Q;HMbs9n*iabAz$s~KQR+edY=blsN-oNknoX_|B{+{#xem?)aU+0_U@9PQD-J}Zu z00`;jeiQ&SWNPidT2s9{LZfrlhq3>m01uT)rIzzwKR$Sc%Tud=>gCIqr>YZ9sIRLp z9rZm1s7))YuRUN70BCPPx*rHg&=8eOlIMQYzXrGHX;kgtjajTk9qJLGyWbU77(+nX z#U?0KdBM>Q-j)#1PCySp(rk5F)5;z8Lw9v*#D${Qb#9%biMS<_-{TG5PNmmZH2Ye-)N*~ih{~QLA-40^bNZd_ z2M30yj(j~pZL0NM+dZ6l*oFd%%#U1V{Kwd z`@@r52h!l(8}7l9pFGvr!h)Hnm{7lx0{{HT{g1MxSgCJ3~nKq4daUHYVV9TY3S!K%sDZmtBS&C53NHyy$~IU*GcYuRE&0wVP`r{mPjFM zx$#ObS+%V@eXKy@C1VY{4mdv&RSWj=$K$(6#GF+4v3Uk#1drTi98Xu$lf--beswEb zocxB65sZ1Jh#K8N8*7tNf#2GF1XAf;3Fw0jb9cqza*TgA&(J^0(TM-9-%k`I^b8?k z;80HxcYLDuh~3nFEHA&K%{RDG@U(a+(99n-9ohTfun`T_+D3qyK4)9Yn{tC$uIt|~ z9KojWCi-xIq*-UjqHvp1Ecvp4@8qUy%=^tVy<8Jp?yb@IZbR;&s{lenLN}KUENi)L zXy+B0O&qSL&U_tSLj&fMxjklEH&O)v^JfARqk_FH^~wtbHzXHZ(TI2UX(B$OG%U^vib5n6; zrIguHV-ttko$eqp@dM2T88IdHj7c()z+aUubKW<=MvJ*>c4p1OmnUh&%l)+8hLY(I z7UYoC^s~IS^EdjyS6wdT3s(NGXEStJ+j#oJmn8XoqDL)Fi874LSM^4Xd$QRE9y=*&o#B`;qPPbXDf_8&oYMl`WS ze=UiPo0GyPZqSu2^Dh{j{%+D^MOdaS+>G~Uu)!3O+j=5jc#E3jFQY!e@)6HCk{O~Z zp+jU9>4%<+5;^hFE}kB}u@N87!B@>iEyX%Snk{JG&8;!fDO#e2k@25T!(v1oSR%**!AVbpUrC|0o1H!lH`6PKffmkO9 z<>Y~{KLkN$&nI?qGf)Pf^1#Y}FalR_v*pgi-V|+T%ueJpJ_@>A^?~xmBB*DXfVo+w zg;a1t(NLtCq?={x{QpR1BaFwu1P(GVZ&FnNXj?b~kri=U{d)jN4`27HgCVJZ1LBBQ AXaE2J literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..09585beb23 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..809d799b13 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,263 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^T. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsA/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^T. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^T. More...
     
    + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgA> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarA cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA, 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsA / GlobalTileTraits::Threads::kW * kScalarsIn4B> cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout = MatrixLayout::kRowMajor
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsIn4B

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..556e57b54b60b0e0cbfcae4e62894af144c5cfef GIT binary patch literal 1661 zcmchYdpy%?9LIl)sKky^=DM6vPVPA_D~2PsuozCRZEi`@DP{?CG`C!G+0jxOry}WO zl2~Fh$|aYTRV1Mk+Au~|V{4qL^Xm2LkMr+&Ua#-xc|Wh``^WRg^M0Nz50s1ICiP7K z04O36PG|rCair|AK}MP?QSMOb1o3e5KCrsFD#iJa+?#E21ycG_OeWK}HvU)Xasv*H z@&cqyYvX0Nw>bdFZALmdc*lcgDSyX2+PygqKCqXt>cY^MGfcbthehLzC+x&{ZMrT# zU2v%g?T;|1sB&263lu|KBT|-YadO4bZxBjp8Ed&A%N9bJ0PU%jf<|?-MtB`ub1ZX3 z44!mv89FNuX^Swo9=C?r=O|ps*uF0g;4Ix|ol)f^C&1om9h%X+YUC9TnQ;eL%Hdn0 z-V#A+4{jPX#QR>$$f1YeCtS&6lVQ=_eZmBt;gZO*4E&_5zP}_UG01W>If^*&nL*Pn zroV)w_>o1tQ)5B7`L^WTQuRdD+p4 z7P9&_=BqM%O0PCQJ#IL7k+t%=`0Oj2XoQ8@L*tP^9IYLf!zg-_uT$5{@!V$78o1u> zpjTzAK&E)|rzuBHOh##KptD|!EWQzh4~d9l^Jk1k_>p*)RDaNzvO?1a%4P4ld#@AQE>@N;%eSVofod=5y?c7X|vc1#bu@0^QM z5`8k8eS}qo|0EH}@`E=H#DRrAm|0C=* z7WmnVZO>;G&q9^O^Cqi&{_PU>s~G>dI|b(tkHGK8?&_MVC+x%IkTU31om#g$&aoIp zeNU9|IjlJJ_i4j#04sUi{?gaD5ph?GnpgB#X0D?@uHC{%jQA(32f zf*+@qE58JuZ*&`-w&5BBaOb%_VyJ!QAb+uIC$l9lzv2V(GH5i-)hzi14YVCdwk5NG z#n7qErtojTh(j4vq!*#Dr~$t#VgLoeYi-|_!5}{a>b_$55l0!fyo$X{3=^$}O*W)} z-gmP9ANY)<(4vP|AdH5!e5D)dK+^AE`;nvv_&)48I1EW3!_%fp$|W@${YNj5J}@#p zMPGfnRIZbxAJlNTRky00*@Iz}RE3t~#McdQTPMnuGH4+8bM1wa_IfJp@V)XdGD)r{sYU*T zwwp^lv$?;ql#AWXy5ag@WDF73^Fk%X7DwW3vdKc$szK|0N{Lwa?BG;L?X*#X95H(i zcQOy(%oX$JyB2ckmhk=|jK*GYlln1)Mb>{9qmJtI(VEhfAk6>L76)=jNr!x>Wh7q` zFNPp~{bu@Vs-@P*2^^v6^l@Q;NYZXIcPXk46~PxfakeAPd*n5ylTgnUV1qZxrr#*F zSwFi~;S9^~8<7ezLf}Eyfj+C` zFx#3$MmJGKpQ09rRI5x<5FE`VslAZF>>J$RyB-|>ywEETLlT1R_;tZ$qRCZB&gQ}Ck~S2;@N?u(#}rDB=k{a!d4VAnf{wAo2V;+-o=s2rLXpPu~5Azy`^o^ b>RpmwJriafr|+mI{VM?E0hANf5lj3B$izxg literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html new file mode 100644 index 0000000000..60b2921a45 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperB< Kind, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..be5a0a9b1e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..fc90114c00 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,263 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^N. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsB/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^N. More...
     
    + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgB> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB, 128 / sizeof(MultiplyAddScalar) / GemmConfig_::kScalarsPerStsB / GlobalTileTraits::Threads::kW * kScalarsIn4B> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kLayout = MatrixLayout::kColumnMajor
    +
    +static
    +
    + +
    +
    + +

    ◆ kScalarsIn4B

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..4dd45b738da49168b71b37de8c8230cea7599749 GIT binary patch literal 1689 zcmb`IdsvcJ9LHa*&0D6+mR&WxWX{aY#7Yx0or0!?W{Rn+R;v(LkEv)5Txx1-UeJkM zm~GzHyr6SRn{pXSO+`r4Qd9`tv=Z{ghP(pm-nD1{?Af0?&vVZCp7ZCz!KEFs1>9X!|HA4^Z79? zlxTQqas?M23Babm+Hh~wUI2J^CnoT46a}GTo~F<4T$6*Ao=9GCm|s zW42_1D)^+EIq!Z0w9V~2_HHn59ko}ETJmVC&pX}lR5xLYj*cEySI9k|d{z=p?A;^h zF-vt^Ww~z4Zugu@u8)HF?x7Aw=Yiwf?6Z&1$YpRREi|lSPH^j$7)LNZ7y8l53UrPE8LXM2k zrY#E{<7ZwZ*55kP+7MaqJuz;*+%(L}O~Z5C4L8_nCk^}ea$N7S)a}D*)@**~Yy1+y zlL~tJnM6QRWK%9HZBwvNbOO;)-rx29vQ20I@x-sy`l$2@rSWX_Mbaft-@!|z`PX>T z#hQsvvmH3xVY?LElRd1-D|qXDxd}{*E4N$qEeQ~6Ea{MsD5hW9erxkP%GR{`YinZ1k3Z5)OvxHdC{l6C_yxI!^I?6{L7g$UW_RRt;< zV4c8%z6#Fb6-Ly|r==KEtVzR*RneBk0JMV4H8#m#Z@Yavj*^uXqCgl;#z8nNrVNMO z3FHWE{EfD6fFl%0hy5KPfX(@N{FtRQtj4k z;q95rA$uR(H2GA=Xr=;UVUS(c+~s-!EmmAXIpVC>0HH}T#WfFF*S6_f?*?wkTXG4_Se5@ zI@^x1LoNQT`=L{z^>V$S4X|I zIXr@fpkPMNne!%9#P`u+th@HjCK+2Nyve}w{u7?C*b*fkWqA@>IP(m~r?OJ5-=D5j zIls!kI76`UoxkW164aMcOFQQ`{yc<5)&w($k{afcGf-2P9~S*X&m>IG>e#pRz}L!P zXz-{{s;FP^riq2pFWu^fBLty?o-z8eYSW0@p@XH+sT|4lYfcy@!&v6l>Eq|v^Qd&1 z{)ds@21VEXB2^6iZ<6Rk3gHdyDa@eJUs-RUsC-_1ha<<7(ZG|va&kdpRzj;m`g$vu xPRDlw&D=5YlOBrm&ot)|$I3_AiRQU9;05)Q2HKIR-S9sGV1mK}n*u(%_9rI3S_=RG literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..04d0fed09e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..d2976060d6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,236 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^T. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^T. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^T. More...
     
    + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, GemmConfig_::kScalarsPerLdgB> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ MultiplyAddScalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::MultiplyAdd::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::MultiplyAddScalar
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmConfig_::ScalarB cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB> cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kLayout = MatrixLayout::kRowMajor
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..f291cad7a3470d5eb65161a5278dcaa23e444e27 GIT binary patch literal 1637 zcmchYc{J2(7{`AmgGodqWG&gZn8QVhYl~5qY{`->$(1!U*FIO2Y#EFta>L+~vDL_S z`w_;zl`O+FB_U0iYYa(qf7iLEb2|6``=0ZB-_LvA=a2V~=X2f^TWfQ^gJK5(0N_Jg znA!mV2MuCdo&!*BH9K8{Hc{IP4rXjN8^U$waz$6feTeo81VQHpj1zR^3AeMh2cV{Z z*5rT#3IO1nXj3Bx3uAbAeh({|C zl5*?{k+KQ!gkueyT=}BsJrh#CgQmdt?jo|Qa!ncp3`^owZ;%$|s|xoX#bqwAG^>5r zD4g7)rD|8}dqPn_CNDl&_{>c}VY>>f6Icq3lJU+oJgU2$qla>c_)~{d>KU^iFm`Ja za&1?zJ&G}uJ;Qi9uohN8a%Oe78O4jxUygmQn!l>z5qCv8!;pO*`}*r*v`~K8qE#_Z z(E@#B(%@aGUoRDlQ{=9Rv_g?9)#!x;u+%9&B5ROrEG*k(aR|TjZD3jI3rTEN4?B28 zStpdkoj_%WXoU_rsRj1VI?ja(W_$6fKN_^ZKjb9Zd|U8At(ncwAvw|5dFsbhlFGwR zEp>j6U*>{zD@~I+-jB;oW#iiKtBv2kWnpCMeK#z$aBt+AcJ%WqRU=^LY@q2QX%ucy zz1A|IuvDdrbsx?--OvORxp?8PpE9gcBaLdG>nYM5|In#ksY(X}myy`BNAf4-#Sg}e zOz>fATqB+s#yX`EO z$rVIV1d)BkSJ@nxx%N&k)r99*n_LDumP4wmMVRHP1;RkXbU6*M+ZlG?LfUr-#@bu@ zpCIH^aDsJRB2ZHibU&0@S4YqQpX_6BY+e`8rbIf1?ADaIIWX-5T^w_;D_LXRZii`+1OPce|Aeax$UCfuF0O1--?gJAhj8xjy8Fc z#F;XZjmD9WcN2O>ie%F_Zo&Ce?n<&9Xf*o|eYTT3Q~?a_wL*5eK$*ZL;ouEdICZoy zBc_JY=0>QUj6J0@=FxK_)C=<}Xd?HNVclVJ37cRX@Qra+d1#!`Zy=~krWDu2>IEgI zJrs*ZSe8mWp4IM`ctp}NnNHaEBs41(Oj1!QAe48a-gqG9U$iA#(;D*ztlW$)RkI8w ztcs$%B_K_>*81Cz{MXe`;@o|G0V;Q-_<-q0Bs|E%{XN8DFeSy*X~^mux<`f!|6pXhyWh?)9Z2 z)1$7NTH*>P=V|h;6x%O%5~8LMUmLUzl;_n2j1!l(VBBi0)|(nLN#IGmR9h$Al-Js~ z_U*iItQUq_x;9LU!*g?VkV6+n;p#6t^m2_D8kZGA^zQiJTkXSrxYm3;OFog`Otrm> z->i4DIVNc-MpSw6-I9p^Sds%-QuVJTDCN8xK6p#SUZ??!Dy%-jkNWzu#2a8)yxW_2> zC@^!&?}Q9&> + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html new file mode 100644 index 0000000000..7153c8237a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits.html @@ -0,0 +1,568 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  GlobalLoadStream
     Assemble the global load streams for A/B. More...
     
    struct  MainLoopSharedStorage
     
    struct  Params
     The params. More...
     
    struct  SharedLoadStream
     Assemble the shared load stream for A/B. More...
     
    union  SharedStorage
     The storage in shared memory. More...
     
    union  StreamSharedStorage
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef GlobalLoadStreamA_ GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GlobalLoadStreamA_::Scalar ScalarA
     The scalar for A. More...
     
    typedef GlobalLoadStreamB_ GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef GlobalLoadStreamB_::Scalar ScalarB
     The scalar for B. More...
     
    typedef SharedLoadStreamA_ SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef SharedLoadStreamB_ SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Epilogue_ Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef BlockSwizzle_ BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators_ ClearAccumulators
     Clear the accumulators. More...
     
    + + + + + + + +

    +Static Public Member Functions

    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayoutA = GlobalLoadStreamA::kLayout
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB = GlobalLoadStreamB::kLayout
     The layout of B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ BlockSwizzle

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef BlockSwizzle_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::BlockSwizzle
    +
    + +
    +
    + +

    ◆ ClearAccumulators

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef ClearAccumulators_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ClearAccumulators
    +
    + +
    +
    + +

    ◆ Epilogue

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Epilogue_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Epilogue
    +
    + +
    +
    + +

    ◆ GemmConfig

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GemmConfig_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GemmConfig
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamA_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamB_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Index_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Index
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GemmConfig::MultiplyAdd cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ OutputTile

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GemmConfig::OutputTile cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::OutputTile
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamA_::Scalar cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamB_::Scalar cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Epilogue::ScalarC cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ScalarD

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef Epilogue::ScalarD cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::ScalarD
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef SharedLoadStreamA_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef SharedLoadStreamB_ cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreStorageA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamA::SharedStoreStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStoreStorageA
    +
    + +
    +
    + +

    ◆ SharedStoreStorageB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    typedef GlobalLoadStreamB::SharedStoreStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStoreStorageB
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ shared_load_fence()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::shared_load_fence (bool in_loop)
    +
    +inlinestatic
    +
    + +
    +
    + +

    ◆ shared_store_fence()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::shared_store_fence (bool in_loop)
    +
    +inlinestatic
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayoutA

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::kLayoutA = GlobalLoadStreamA::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ kLayoutB

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::kLayoutB = GlobalLoadStreamB::kLayout
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html new file mode 100644 index 0000000000..5f89b801b1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream-members.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream, including all inherited members.

    + + + + + + + +
    commit()cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    copy()cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    GlobalLoadStream(Params const &params, SharedStorage &shared_storage, dim3 const &block)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    residue(Index k, bool skip_clear=false)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStreaminline
    stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream
    stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html new file mode 100644 index 0000000000..6343fabc35 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1GlobalLoadStream.html @@ -0,0 +1,295 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream Struct Reference
    +
    +
    + +

    Assemble the global load streams for A/B. +

    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalLoadStream (Params const &params, SharedStorage &shared_storage, dim3 const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Trigger the copies from shared memory to registers. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
     Execute the residue code. More...
     
    + + + + + + + +

    +Public Attributes

    GlobalLoadStreamA stream_a
     The stream for A. More...
     
    GlobalLoadStreamB stream_b
     The stream for B. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalLoadStream()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::GlobalLoadStream (Params const & params,
    SharedStorageshared_storage,
    dim3 const & block 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::copy ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::residue (Index k,
    bool skip_clear = false 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamA cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::stream_a
    +
    + +
    +
    + +

    ◆ stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamB cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::GlobalLoadStream::stream_b
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html new file mode 100644 index 0000000000..5f36220360 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html new file mode 100644 index 0000000000..95f9a8291c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1MainLoopSharedStorage.html @@ -0,0 +1,154 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage Struct Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + +

    +Public Attributes

    StreamSharedStorage< GlobalLoadStreamA, SharedLoadStreamAstream_a
     
    StreamSharedStorage< GlobalLoadStreamB, SharedLoadStreamBstream_b
     
    ClearAccumulators::SharedStorage clear
     
    +

    Member Data Documentation

    + +

    ◆ clear

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    ClearAccumulators::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage::clear
    +
    + +
    +
    + +

    ◆ stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    StreamSharedStorage<GlobalLoadStreamA, SharedLoadStreamA> cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage::stream_a
    +
    + +
    +
    + +

    ◆ stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    StreamSharedStorage<GlobalLoadStreamB, SharedLoadStreamB> cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::MainLoopSharedStorage::stream_b
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html new file mode 100644 index 0000000000..05de1ce4e7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params-members.html @@ -0,0 +1,99 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params, including all inherited members.

    + + + + + + + + + + +
    epiloguecutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    global_stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    global_stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    initialize(GemmDesc_ const &desc)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Paramsinline
    kcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    mcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    ncutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    shared_stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    shared_stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html new file mode 100644 index 0000000000..ffeb872b17 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1Params.html @@ -0,0 +1,292 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_traits.h>

    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
     Initialize the parameters. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    Index k
     
    GlobalLoadStreamA::Params global_stream_a
     The params for the A stream. More...
     
    GlobalLoadStreamB::Params global_stream_b
     The params for the B stream. More...
     
    SharedLoadStreamA::Params shared_stream_a
     The params for the A stream from shared memory. More...
     
    SharedLoadStreamB::Params shared_stream_b
     The params for the B stream from shared memory. More...
     
    Epilogue::Params epilogue
     The params for the epilogue. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::initialize (GemmDesc_ const & desc)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ epilogue

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Epilogue::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::epilogue
    +
    + +
    +
    + +

    ◆ global_stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamA::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::global_stream_a
    +
    + +
    +
    + +

    ◆ global_stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    GlobalLoadStreamB::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::global_stream_b
    +
    + +
    +
    + +

    ◆ k

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Index cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::k
    +
    + +
    +
    + +

    ◆ m

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Index cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::m
    +
    + +
    +
    + +

    ◆ n

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Index cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::n
    +
    + +
    +
    + +

    ◆ shared_stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::shared_stream_a
    +
    + +
    +
    + +

    ◆ shared_stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB::Params cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::Params::shared_stream_b
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html new file mode 100644 index 0000000000..64969b354d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream, including all inherited members.

    + + + + + + + + + + + + + +
    commit(int step)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    copy(int step)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    fetched_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    fetched_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    fragment_a(int step) constcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    fragment_b(int step) constcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    inc_stage()cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    SharedLoadStream(Params const &params, SharedStorage &shared_storage)cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStreaminline
    stream_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    stream_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    transformed_acutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    transformed_bcutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html new file mode 100644 index 0000000000..7735b012b8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GemmTraits_1_1SharedLoadStream.html @@ -0,0 +1,418 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream Struct Reference
    +
    +
    + +

    Assemble the shared load stream for A/B. +

    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE SharedLoadStream (Params const &params, SharedStorage &shared_storage)
     Ctor. More...
     
    CUTLASS_DEVICE void copy (int step)
     Trigger the copies from shared memory to registers. More...
     
    CUTLASS_DEVICE void commit (int step)
     Commit the data. More...
     
    CUTLASS_DEVICE SharedLoadStreamA::Fragment const & fragment_a (int step) const
     The fragment A. More...
     
    CUTLASS_DEVICE SharedLoadStreamB::Fragment const & fragment_b (int step) const
     The fragment B. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    SharedLoadStreamA stream_a
     The stream for A. More...
     
    SharedLoadStreamA::FetchedFragment fetched_a [2]
     The fragments to fetch A. More...
     
    SharedLoadStreamA::TransformedFragment transformed_a [2]
     The fragments to transform A. More...
     
    SharedLoadStreamB stream_b
     The stream for B. More...
     
    SharedLoadStreamB::FetchedFragment fetched_b [2]
     The fragments to fetch B. More...
     
    SharedLoadStreamB::TransformedFragment transformed_b [2]
     The fragments to transform B. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ SharedLoadStream()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::SharedLoadStream (Params const & params,
    SharedStorageshared_storage 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::commit (int step)
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::copy (int step)
    +
    +inline
    +
    + +
    +
    + +

    ◆ fragment_a()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE SharedLoadStreamA::Fragment const& cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fragment_a (int step) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ fragment_b()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE SharedLoadStreamB::Fragment const& cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fragment_b (int step) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::inc_stage ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ fetched_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA::FetchedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fetched_a[2]
    +
    + +
    +
    + +

    ◆ fetched_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB::FetchedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::fetched_b[2]
    +
    + +
    +
    + +

    ◆ stream_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::stream_a
    +
    + +
    +
    + +

    ◆ stream_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::stream_b
    +
    + +
    +
    + +

    ◆ transformed_a

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamA::TransformedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::transformed_a[2]
    +
    + +
    +
    + +

    ◆ transformed_b

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    SharedLoadStreamB::TransformedFragment cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedLoadStream::transformed_b[2]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html new file mode 100644 index 0000000000..28de78f121 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::Gemm< GemmTraits_ >::Params Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::Gemm< GemmTraits_ >::Params, including all inherited members.

    + + +
    initialize(Index m, Index n, Index k, ScalarEpilogue alpha, ScalarA const *d_a, Index lda, ScalarB const *d_b, Index ldb, ScalarEpilogue beta, ScalarC const *d_c, Index ldc, ScalarD *d_d, Index ldd)cutlass::gemm::Gemm< GemmTraits_ >::Paramsinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html new file mode 100644 index 0000000000..63d08ff998 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.html @@ -0,0 +1,217 @@ + + + + + + + +Cutlass: cutlass::gemm::Gemm< GemmTraits_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::Gemm< GemmTraits_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm.h>

    +
    +Inheritance diagram for cutlass::gemm::Gemm< GemmTraits_ >::Params:
    +
    +
    + + + +
    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Index m, Index n, Index k, ScalarEpilogue alpha, ScalarA const *d_a, Index lda, ScalarB const *d_b, Index ldb, ScalarEpilogue beta, ScalarC const *d_c, Index ldc, ScalarD *d_d, Index ldd)
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename GemmTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::Gemm< GemmTraits_ >::Params::initialize (Index m,
    Index n,
    Index k,
    ScalarEpilogue alpha,
    ScalarA const * d_a,
    Index lda,
    ScalarB const * d_b,
    Index ldb,
    ScalarEpilogue beta,
    ScalarC const * d_c,
    Index ldc,
    ScalarDd_d,
    Index ldd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.png b/docs/generated-html/structcutlass_1_1gemm_1_1Gemm_1_1Params.png new file mode 100644 index 0000000000000000000000000000000000000000..6e8ee648b676f15ef6a830f006394b194e999d66 GIT binary patch literal 788 zcmeAS@N?(olHy`uVBq!ia0y~yU=#$h12~w0B&X%ZgFs3mz$e7@|Ns9$=7+B@mK`dc z0AzvjfddC3HdcfIxf~@ye!&btMIdnXREQA+1Jhzp7srqa#)#c3@^G|Gmp}gfq4VWr&tJjw@4IVFWqau&^Zc5J ztMSz{#div7?ys8|GVLArli6iw8I|I%x$la-JXg2>F-sSq@2XAerUuM|$XSK^b zZ<)B~4qt9vW_G#fv$%~(zXZJztcF$*@Z;NkFnR=yWammuS<$n8rM7_E!gfIQI(Gevxi2EsqAliZ`99Gr)o%a!$D$JpR~}tAljC~0#)Z}KpIqmN z{?6MX{IdC4`B|^8ThAqY&h4D;B6T5hW{K&hU(M(47aHfeRpwuvc5KfyaSt(V=fsCo zRl|Or=#ib`r+fQv?B*L=0%E59Y<$71G`seR#l)$0m*#3GAD4XZyLM05of(z&*8(Cg zn$#Qfz1SlEcK0$lsoUvi|FqB9m0@3>eVt*?Q~&3TF*oN6zO*(wAo?fgMD$Unf(!E3 xPWb=HZs7j)IhOf@y%loQymRr=RL#3??>S*bmy@{r9$*q?@O1TaS?83{1OSjPVB-J) literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html new file mode 100644 index 0000000000..c955db6565 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::GetExtent< kOperand_, Tile_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GetExtent< kOperand_, Tile_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html new file mode 100644 index 0000000000..725806f650 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >, including all inherited members.

    + + +
    kExtentcutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html new file mode 100644 index 0000000000..4e613ddc9a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kA_00_01Tile___01_4.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Attributes

    static const int kExtent = Tile_::kW
     
    +

    Member Data Documentation

    + +

    ◆ kExtent

    + +
    +
    +
    +template<typename Tile_ >
    + + + + + +
    + + + + +
    const int cutlass::gemm::GetExtent< GemmOperand::kA, Tile_ >::kExtent = Tile_::kW
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html new file mode 100644 index 0000000000..d17a7e7c5f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >, including all inherited members.

    + + +
    kExtentcutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >static
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html new file mode 100644 index 0000000000..172db999e2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GetExtent_3_01GemmOperand_1_1kB_00_01Tile___01_4.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Attributes

    static const int kExtent = Tile_::kH
     
    +

    Member Data Documentation

    + +

    ◆ kExtent

    + +
    +
    +
    +template<typename Tile_ >
    + + + + + +
    + + + + +
    const int cutlass::gemm::GetExtent< GemmOperand::kB, Tile_ >::kExtent = Tile_::kH
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html new file mode 100644 index 0000000000..f3b227eb8c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream-members.html @@ -0,0 +1,112 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >
    commit()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    copy()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    fetched_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    FetchedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Fragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    GlobalLoadStream(typename Base::Params const &params, typename Base::SharedStorage &shared_storage, Coord< 3 > const &bounds, Coord< 3 > const &block)cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >inline
    GlobalLoadStreamBase(Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Index typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    kLayoutcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >static
    load_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    LoadIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Pointer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    residue(Index k, bool skip_clear=false)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Scalar typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    SharedStoreStorage typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    store_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    StoreIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformed_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Transformer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformercutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html new file mode 100644 index 0000000000..99cbcad5da --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.html @@ -0,0 +1,253 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >:
    +
    +
    + + +cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Base
     The base class. More...
     
    - Public Types inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    typedef LoadIterator_ LoadIterator
     The load iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef StoreIterator_ StoreIterator
     The store iterator to write to shared memory. More...
     
    typedef LoadIterator::Fragment FetchedFragment
     The fragment that is copied from shared memory. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef TransformedFragment Fragment
     Make sure the fragments match. More...
     
    typedef LoadIterator::Scalar Scalar
     The scalar type of the iterator. More...
     
    typedef LoadIterator::Pointer Pointer
     The pointer. More...
     
    typedef LoadIterator::Index Index
     The index. More...
     
    typedef StoreIterator::SharedStorage SharedStoreStorage
     The amount of storage in shared memory needed to store the tile. More...
     
    + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalLoadStream (typename Base::Params const &params, typename Base::SharedStorage &shared_storage, Coord< 3 > const &bounds, Coord< 3 > const &block)
     Ctor. More...
     
    - Public Member Functions inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    CUTLASS_DEVICE GlobalLoadStreamBase (Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
     Execute the residue code. More...
     
    + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    LoadIterator load_iterator
     The iterator. More...
     
    FetchedFragment fetched_fragment
     The fragment to fetch from shared memory. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment to convert the data after it has been fetched from shared memory. More...
     
    StoreIterator store_iterator
     The store iterator. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    static MatrixLayout::Kind const kLayout = LoadIterator::kLayout
     Make sure the transformed fragment is the same as the store fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ = Copy<typename LoadIterator_::Fragment>>
    + + + + +
    typedef GlobalLoadStreamBase<LoadIterator_, StoreIterator_, Transformer_> cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >::Base
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalLoadStream()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ = Copy<typename LoadIterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ >::GlobalLoadStream (typename Base::Params const & params,
    typename Base::SharedStorageshared_storage,
    Coord< 3 > const & bounds,
    Coord< 3 > const & block 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.png b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStream.png new file mode 100644 index 0000000000000000000000000000000000000000..4959cca992b07d0798e4c38a2078f11b7173156c GIT binary patch literal 1505 zcmcJPX;70_6o$V5k+4Lth=?M{K0w(-nqV4P1VX|pTL393Ta+yjS(+lW5V2I4fCgEt z0wNgpMMV=q0+9iMLd77UELm6z7%&lK6B-(9r_-7K?wL8~y!Xzz_xyS0z3SzGg32F} z2LJ$yc5(Cu0FX$cy&%$(`ci#7Ptp#1x%oQn?(RyYXl;@9Ecu#5ea^0~u7LX_f5{P& z&%Ec1TX_ zp<--NWT4cq557FzrGo~cS2L7@sT$fc#d;_ROTYhsLjw9@Rwgg%54|OnOMWNC^uy_> z$PO&7xN0A_x{R2Dk6!FoegCTGR)gR&%`^3F2ldrVCAs+^c+{0lAJ?yC5#6Zx%`9G` zT730OKW7)Vdju=7j<&WD5@u5o8>G4|5YC|HvBac>3Cgka9tQ|lC)#r> zp+xSdb%O%)A?@(k#kQf5o3*RnQG-nw@66>H(JIt`HdO_FX=4yj$QsITL51{% znqyL^%6_4+Fl$O{AU%4trtz0dEa@CF9bX5HSehYul$#l#Ro^`<(l(zp)VF5nAZIPq z(v%`c*e_bGQrE%px2|Xtz_#Ovd}Lu<<6{ote6CyhO=F*h#XL_rjyXaw1NGM>sZ$By zkM8IPT+Q5C9*uqK%o7~JQ|^apHlibSE-bwvAuNDjdBERq_{z!N^#-MmzKPno--+Gw z&F$|Sp2=D@+TLhB*H#+*9iHSVx{ei=@`cJfvJK%ZoJF#AH$AHTU7S7KK%_~H4wz3j zf-=*%8b}5c7OV0=@DpdwExD$YUr1l6e@@#!y2Zo#vS1 z6FQ$E$gXUYl4}@tC*{T0Vwgj!r-*1yt@=?drr`BtzFvo{Rw&X%UzIx~FFcF(wxfz8 zrs9{V2d`Ci%4;+9Y#bnF=sabzD^)j(N1cNxv5Nl!%naatfdfE*7K8=b;R#F*$|hV^ z*632wOwFDFLX^qU42M1_526jvkURt=KS;wJAp8HP3j)~F=$;+MXbX@#4)8$#S?Vb# zhOg$e{u_DOk}zjjYf6u8@~nXXw&1KV+?}Mt3*5QOGjXzCqw-dgGmK7aZ%$Drqml{_ z0P!pr(e2)Rm3;1{tMd^Ljj+qjow-_tFM3{<#=uf;+n64st#aBPo8Bl!+%Fr6bK9Ok zziyCPH<_w2=i-qhlRDkuZ49%#9H9@cv4CHYf!9}v4eiB);)$x%Y3zHTD?y*@tBFAtjKIEMxPz7F-0X{=?a_Q3|d9D}jIi z5%;QFH1@q1>A*J2vXSbeay1F%j}&9nbzM#}bdI@*b9hQmE;`mT2mVTcBrvx^CfBlh9zY{~ntHK&wB3zT#8f+JPF$qg4b<;QbNY?fz zN&5%!wE}gf`{Wwu(4BB8A-`^4ASe6e=4yK4N8Tq+3v{6AXljAgXvL}aJe1Hp!AQnu|nmvscJw>XmAgU zw*-VNRDhCqh9xKLQ5m$-}Y9;al>bYox}J literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html new file mode 100644 index 0000000000..372751d8ce --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase-members.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + +
    commit()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    copy()cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    fetched_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    FetchedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Fragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    GlobalLoadStreamBase(Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Index typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    kLayoutcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >static
    load_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    LoadIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Pointer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    residue(Index k, bool skip_clear=false)cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >inline
    Scalar typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    SharedStoreStorage typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    store_iteratorcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    StoreIterator typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformed_fragmentcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    Transformer typedefcutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    transformercutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html new file mode 100644 index 0000000000..8741f1ba74 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.html @@ -0,0 +1,602 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_stream.h>

    +
    +Inheritance diagram for cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >:
    +
    +
    + + +cutlass::gemm::GlobalLoadStream< LoadIterator_, StoreIterator_, Transformer_ > + +
    + + + + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    union  SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef LoadIterator_ LoadIterator
     The load iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef StoreIterator_ StoreIterator
     The store iterator to write to shared memory. More...
     
    typedef LoadIterator::Fragment FetchedFragment
     The fragment that is copied from shared memory. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef TransformedFragment Fragment
     Make sure the fragments match. More...
     
    typedef LoadIterator::Scalar Scalar
     The scalar type of the iterator. More...
     
    typedef LoadIterator::Pointer Pointer
     The pointer. More...
     
    typedef LoadIterator::Index Index
     The index. More...
     
    typedef StoreIterator::SharedStorage SharedStoreStorage
     The amount of storage in shared memory needed to store the tile. More...
     
    + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalLoadStreamBase (Params const &params, SharedStorage &shared_storage, Coord< 3 > const bounds, Coord< 3 > const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void residue (Index k, bool skip_clear=false)
     Execute the residue code. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Attributes

    LoadIterator load_iterator
     The iterator. More...
     
    FetchedFragment fetched_fragment
     The fragment to fetch from shared memory. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment to convert the data after it has been fetched from shared memory. More...
     
    StoreIterator store_iterator
     The store iterator. More...
     
    + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = LoadIterator::kLayout
     Make sure the transformed fragment is the same as the store fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ FetchedFragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Fragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::FetchedFragment
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef TransformedFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Fragment
    +
    +

    The output fragment.

    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Index cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Index
    +
    + +
    +
    + +

    ◆ LoadIterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator_ cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::LoadIterator
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Pointer cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef LoadIterator::Scalar cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStoreStorage

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef StoreIterator::SharedStorage cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStoreStorage
    +
    + +
    +
    + +

    ◆ StoreIterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef StoreIterator_ cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::StoreIterator
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    typedef Transformer_ cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalLoadStreamBase()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::GlobalLoadStreamBase (Params const & params,
    SharedStorageshared_storage,
    Coord< 3 > const bounds,
    Coord< 3 > const & block 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::copy ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ residue()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::residue (Index k,
    bool skip_clear = false 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ fetched_fragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    FetchedFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::fetched_fragment
    +
    + +
    +
    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::kLayout = LoadIterator::kLayout
    +
    +static
    +
    +

    The layout.

    + +
    +
    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    LoadIterator cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::load_iterator
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    StoreIterator cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::store_iterator
    +
    + +
    +
    + +

    ◆ transformed_fragment

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    TransformedFragment cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::transformed_fragment
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    Transformer cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.png b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase.png new file mode 100644 index 0000000000000000000000000000000000000000..656870897cff1220f04dd328aed02edd07299037 GIT binary patch literal 1507 zcmb`{eKga190%~9;hG^s+!(tY4`puF^$;P1tkJ<`HqTFmC^qu2JEj`dO?kM#M2b+j z&A~(>vCt)r7HXW)n8$LBjpH$DcirwE_x^vs=X}23_xF1~pZ{Lxd(F!OqXg4}0RW(c zJ>lXF08%1J_JT@F_94yDT*m=^DSS*%A(fY^g?&N$)`nx(iJN+J!@scYv$=kyR zkTiW=92U+D0Av%fF2`_WsijhG{EVBTi)YcrUGe@FIJvNl5lNjUs}# zn{mFko4q3h3XmP4d-za2>x=qXd>|K2leX@56uq3bRnv=Gy_g)-Yej-Hf3`tx1_LLg!O>lduD?w9=q~bGlu^E55I0(qFTfDOzXo zQakFVBSpj}WVtK88yMxUKecmZQPTdD1qGp}-sjkpN$(Ao#VxBF>9kVtPA;DO`IqHc_^rRI=xn8I=o+W6j zusY(S6B|vQi`)705#4!{+V4llFRC`0;)^bH1~jh)SKY4~-|10YCXg|tiv-dpvXk*L z8Y$lIMDY|Qpdnl0h&a`>9VxQ1eK#c^(v$l3WFAZx5TR#yTo@eK!l+1$IQ<)q2yRzZ z05Ptbg#e~2vT#6j39j9#%w#$x09Md0sa<|W>VZ9cS&1=|toPrH{b&9wy8r!wj~7My zYvgSO6ho z;j#FYc_X*L1mbmlW>t`3HKb-#kD2H>AG9> zp8u4jkF6n-}NnW%~#*ffiUU$P=Xl09~j@t9t{TPHm zA!Jae`Cc?(q1Q!}!S%O_Z_w)Mk4|lBC%S>3+ra0#t@OuiujRt=^R&>GHM>U?PF>}0 zj!C*jbq%a7zqj?LMN4#J3hF5|6~t9o8oo+r5Vpc5)~~K6ySMl(u@!~#6Qe@5;)Sbx zhcF$5FEo1DfGBRPe)9>SqefOHSoW%izakf@_GCV!1Ed4IR#+gfg&l zGN&0UIOqyaWac#>D?y9iK-BKo4Fg$&1>#Kb!C(*quI1^=6d7QkU52kzoJiQjEO-#8 mJ*BtR_@jH6e_@a87isgDDB|s#@leT+3}BCYxYRp^(f$IgQO-O7 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html new file mode 100644 index 0000000000..f2125003c0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html new file mode 100644 index 0000000000..31c214b200 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1Params.html @@ -0,0 +1,185 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_global_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld)
     Setup the params. More...
     
    + + + + + +

    +Public Attributes

    LoadIterator::Params load_iterator
     
    StoreIterator::Params store_iterator
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params::initialize (Pointer pointer,
    Index ld 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    LoadIterator::Params cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params::load_iterator
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    StoreIterator::Params cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::Params::store_iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html new file mode 100644 index 0000000000..1c76028253 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    commit()cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    copy()cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    fragment()cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    Fragment typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    GlobalStoreStream(Params const &params, SharedStorage &shared_storage, Index m, Index n, Index k, Coord< 3 > const &block)cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    inc(Index predicate_inc, Index pointer_inc)cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >inline
    Index typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    input_fragmentcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    InputFragment typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    iteratorcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Iterator typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Pointer typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Scalar typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    SharedStorage typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    transformed_fragmentcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    transformercutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    Transformer typedefcutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html new file mode 100644 index 0000000000..3913ca1d0e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream.html @@ -0,0 +1,562 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_stream.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Iterator_ Iterator
     The store iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef Transformer::InputFragment InputFragment
     The input fragment. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef InputFragment Fragment
     Make sure the fragments match. More...
     
    typedef Iterator::Scalar Scalar
     The scalar type of the iterator. More...
     
    typedef Iterator::Pointer Pointer
     The pointer. More...
     
    typedef Iterator::Index Index
     The index. More...
     
    typedef Iterator::SharedStorage SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE GlobalStoreStream (Params const &params, SharedStorage &shared_storage, Index m, Index n, Index k, Coord< 3 > const &block)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Trigger the copy from the fragment to shared memory. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    CUTLASS_DEVICE void inc (Index predicate_inc, Index pointer_inc)
     Increment the iterator. More...
     
    CUTLASS_DEVICE Fragmentfragment ()
     The fragment. More...
     
    + + + + + + + + + + + + + +

    +Public Attributes

    Iterator iterator
     The iterator. More...
     
    InputFragment input_fragment
     The input fragment. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment containing the transformed data before the copy into shared memory. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef InputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Fragment
    +
    +

    The input fragment.

    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Index cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Index
    +
    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::InputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::InputFragment
    +
    + +
    +
    + +

    ◆ Iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator_ cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Iterator
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Pointer cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Scalar cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::SharedStorage cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer_ cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ GlobalStoreStream()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::GlobalStoreStream (Params const & params,
    SharedStorageshared_storage,
    Index m,
    Index n,
    Index k,
    Coord< 3 > const & block 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::copy ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ fragment()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE Fragment& cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::fragment ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::inc (Index predicate_inc,
    Index pointer_inc 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ input_fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    InputFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::input_fragment
    +
    + +
    +
    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::iterator
    +
    + +
    +
    + +

    ◆ transformed_fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    TransformedFragment cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::transformed_fragment
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Transformer cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html new file mode 100644 index 0000000000..242c1fb8c8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html new file mode 100644 index 0000000000..d859deef66 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1GlobalStoreStream_1_1Params.html @@ -0,0 +1,168 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_global_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld)
     Setup the params. More...
     
    + + + + +

    +Public Attributes

    Iterator::Params iterator
     The iterator params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params::initialize (Pointer pointer,
    Index ld 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator::Params cutlass::gemm::GlobalStoreStream< Iterator_, Transformer_ >::Params::iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html new file mode 100644 index 0000000000..07ccbe6df3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kStagescutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kThreadscutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    Warps typedefcutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html new file mode 100644 index 0000000000..b5879328f2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    typedef half ScalarA
     The scalar for A. More...
     
    typedef half ScalarB
     The scalar for B. More...
     
    typedef half ScalarC
     The scalar for C. More...
     
    typedef half ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< half, half, half, half, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, half, half, half >, kScalarsPerLdgA_, kScalarsPerLdgA_, 8, kScalarsPerLdgB_, kScalarsPerLdgB_, 8, 2, 8, 2, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmConfig.png new file mode 100644 index 0000000000000000000000000000000000000000..cf90457b12c8073e9e18c92e0ed62ac6a8973694 GIT binary patch literal 2911 zcmd5;dpy&7AE(ZgxPqKzZO+8i#kBOzfCYEvoH&ol**D)X{Ob7Zv$5AT89;{&NKroL_ zVfI1q>Kwfdi~ieNd;)SrYW(IsTMjB-4_S|``r~)@Bpox6RX~(rg37+!!H#w=d?iVi z7$ZnuzHXmOQh-{(swSgOEnCOR)YV(gA2m(PqAwDMBC9a5aw# zh5z9rxgt#!)8iAP3#ADj`92D8G7T4k$2wCGKxF011kegW2KqdB{&~uEyyDruN__;) zG8%F1B(c8U$vz=|sAnG%9Ar2B1SAv=gzT&1uIP)T%g-_e=#$DAVOskLBEeCX(b2b z?vGTNvrYEJ$DgvGGJns{Xq+_CTJT^R3<(KGusVJ|{uig_vTj=)afkwop3Kw_2yIvBZrumRf3;(^wcMn7gLepVR(F%?e{5a6>0M~O8+r0;M$Bo?2<}a$HWAr3eP^y|q{LBT1JEn< ztkuM0*)!(qOO4s>=Dk-My{^CC7JH$e2RC27eENB%t3EV|;UpfDdpEr#4j%sWMY<7p zW0&L026YFxoZ(tpi(WU~c1LW)t#(+G;u(6QvcsMkqQz2zzR;V3f6QEPvHCn@x4(vA zJJ;9b1tJ_pIR_%R69pp{I|&V$o6oaq2uMH094is=y>D7eoV`CR+ACwZgcZ*(TO1-_ zg6u<>xUKn7HHU4DdD>=sT5UQ{M5dbw!tUaHdNj0b8~T3PyD+tq@9wSSiK$Ha=|gBk zTOUacC6>5RY6NDcABEeMK?DolPJl9NG;#K^G;OhtHm-WegoTG@xq32E-%~8aDE$S*sHzij@H&@*lazd z*lsfaWAWpw*m>o6mnt@Y;-y|}@SEk=x`pP?sFPz&A$qe`G(<}oELyL?=Vo5vG#LJ5 zti0Mhn`hOs{{m3~G(3T;V!F8&Ar2U~YP*j~m*~dhZK)%-rN6&#T}IyJzy5j=O4X z^cG^XJ~Ju?u);9EB1_*#=H)FBjJ1Anu^~>i7^3onF%=QDy+&`@dE>a3X<%`uKd&`^ z{O6-6R@;T%FK_Hro(C-3yEl~)`gPk>VobPK_(VDh$va7b&MBp3?C%cvSOL!3qA{@S zDJuzV)NBjQo+x9x{v+7*!@{o5t|mvXLQZC_%I>APjtO?G&Lc25aVgqrL9cr+VK>J_ z4r$OhH5y>xDX2yDkc}AQlLQG$4IZ~W^PLB@MHtv6fE_{6u|12*iYrAxNv=O;&zqy? zFcS$Pk4UO66tn6m3DL{bAyKv{{i%Ee!L6Eo09~y_XMZEtbsimr0QA05FHEN!lJlK+ z0(6@49Wev{lbDSFG=cRlfi>+fYufcgG=cA%4Ryewexm zcqDCAa$t+S)Z|8%&q0?!{K> zpt=^bNjd@)q=3g{tK5JaFz(=QFw#o?Ut!3kZc`WcLoWRuXZlTd_M5u=wV~cH)m`5z zFx&gTE8Xrs)@)G_+-y5m!95dz>J@xtj<9Of&D)7Qg2blemAb$;T`8)ivz`8%&ZUgL zw_0yG1MC@cU3f7yX0Oxc^3*JMV0u|+H;{NwEfkbq8>KpmSpIqr$e$*K0FkBg*WRC> zRgY6sWI=j+aotp+MzrQ04l=vR`DEdKGn3iCclXnz>%FrLS!_}aYmc+R2eeXLo=~RqYWW(> zX@7Yn3cz}Wa+PE3+4UTxV3!9oPA+8}o{1?|#aFKGE8smkO2sPjv)|n!U5$03v=>A2TozhoKTnAi_2Y9Xz@E- z6_BSi9R-Gm-t^Ztf;5%1?}RGOdDXos zTHvQ;aHE=Xv{Qrhpy(+zARzF|ug?d^hs6YSo$vV3cxCHp%IxNXx{MD~f~07}EuSSb z@u86+F_R%fQTVh)xTu0FL}@=|vb=|8(#=N)e(`4yKsv}|l3?n*24FeZ7hZvPc z9B)d*lZv-#9d(Cr_!n!1V_RG27HVSsyOzU_%i7(68Ly-fny;a7uZ)Jpo~c@`T&1(u z=0(c4Zf!zts<;<)@5AllBlnYUh?f(6c=tLyP}oj F{|mW{rD*^F literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html new file mode 100644 index 0000000000..f1a365e8e4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Delta typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html new file mode 100644 index 0000000000..eca01a3cb6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.html @@ -0,0 +1,254 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Base::Threads Threads
     The threads. More...
     
    typedef Shape< 1, 2, Base::Tile::kC > ThreadsDelta
     The threads strides. More...
     
    typedef Shape< Base::Threads::kH *2, 1, Base::Threads::kW, Base::kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< Base::Tile::kH/Base::Threads::kH/2, 2, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand = kOperand_
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout = kLayout_
     The layout. More...
     
    static int const kAccessSize = kAccessSize_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Threads::kH * 2, 1, Base::Threads::kW, Base::kAccessSize> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Tile::kH / Base::Threads::kH / 2, 2, Base::Tile::kW / Base::Threads::kW, Base::Tile::kC / Base::kAccessSize> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<1, 2, Base::Tile::kC> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..5af8a71018b08e853091ac0ba7f2ce053855e221 GIT binary patch literal 2175 zcmchZdo&wAIp-p_YEDK2n3 zker$v001Bu^c(^Jq=^#SW#=}@dRYwLDcMwA9Nld;H#a5b{PGv_>xfK=wKW+G#)X&B z9+KnE>j=0TAPM?0`ro?iOI(VubJp(BQoI5lVe+t|jncsR#f^+t7AblJAc0gL_3;Gr z3rWQ%QD*xC`ajj$W$oz_Rz z(k4`^X`riKIT9LeR@aWgaFH3@&+G5rjhAPnl~~rRgyEwPzP;ok{L;2Z^TGJUG%oGi z{i*lgp&xFo&c|ittKZkccUkpwga2g3JZry5^zieP%OrJd%$sHV_i>{a-qqU1ID|)P zmOEhlQbf((gzH4n(6y+E^QEI>qXQ%H9>~QFI%oVoF+{~G3a3QDQ{Obp(f2)ETNN(o z@t4mMs6sl~#VR4cc}k}%k$M{r(EwVvaF2#tE{L`pVB&Ile!DwNj;5 zKdbXCD2JnEdS~(0;jqynuQL?oEj};a`+W}3<{E(gOI;MNK0W1+;(+$Zdrp8 zPoD8X+a6qYtbe3h7$lyb$F0R#^auWejv$>Fw5+d4EgC0wvc-b(1tSI%PUKc8(gP&U<_SNPX z<)Ny`+(7yZrzS4wAl)<2LgluW>5o@s2Uk&tpTE9Hok@a}3BP~!{Y}*_!n0uPL*N~V zUs3eponKF&aa@xDF3ed;s?}a2{qfw4wN6!_PUm0=g_ukkGL%)|Z;sUI!VbQv8*IH^ zK*;D?jZ1EffmNTj*B3!Z7MX|u14ME2OJZ1$jFYHri_NOoexh}ci>0V$!? zxoy3v?k7Su&@{l(*3FV|Q+W4(;p0EVpWT-ZPMcPp(ZL|O8=txMw#nqqQT@g@MTM}pVraj?cl2* zOydnp?~3&NVAk6?dnq7meY{FJ;R128_0+W5XAq=5YoORzU_6%G&LLw6@%?$i8X<;S z)Ia#jiXTWP@eEILc$rNZ6VG(Oi=4FUEW4kMfAUV8oqjoL9{ixVmhAK>egl#;sLqhc z2rsfpm}uZL{|2S9Jux8L*_s$T~0y_T@h5*QhpV_|9!b^H~$d4+Dq8R z4D}(#Gyh>0TK+y(M-FH|7Q*lAJo!v9H3o zm-`-H>SWR^YAUKc_5ALUJ+pQl$7p|s?(ub2($6(AVEk8F9(<%c0e)8D8-QQN zL(o;r(ht2LeM;2T5=zDr0FgI|^-vdgMP3EK?DMx98u^0Jg|86}80t*fibZxUb@4RL z%A2+D#<1C%r3bhBBXEsdft*fXL8)RJG`ry*v;m$u_wiD2?zIt+1?i(1`GnbJ!Q@9; z^m`_045Y@jSPV;JW3Txl!F3}iYiz{ZPzDbrFMLs0t0c{Y3hg0NbTSp zSZZK>KUMG + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..b9fb35edc8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmCrosswiseGlobalTileTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <hgemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::HgemmCrosswiseGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html new file mode 100644 index 0000000000..1abd2b9094 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmSwizzle< GlobalIterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html new file mode 100644 index 0000000000..495f144598 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmSwizzle.html @@ -0,0 +1,273 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmSwizzle< GlobalIterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmSwizzle< GlobalIterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_swizzle.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GlobalIterator_ GlobalIterator
     The global iterator. More...
     
    typedef GlobalIterator::Fragment Fragment
     The source fragment. More...
     
    typedef GlobalIterator::FragmentShape FragmentShape
     The shape of the source fragment. More...
     
    typedef Fragment InputFragment
     The input fragment. More...
     
    typedef Fragment OutputFragment
     The output fragment. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE HgemmSwizzle ()
     The src/dst must be half fragments. More...
     
    CUTLASS_DEVICE void transform (Fragment const &src, Fragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::Fragment cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::FragmentShape cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ GlobalIterator

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator_ cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::GlobalIterator
    +
    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ HgemmSwizzle()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::HgemmSwizzle ()
    +
    +inline
    +
    +

    The number of elements must be a multiple of 2. Ctor.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::HgemmSwizzle< GlobalIterator_ >::transform (Fragment const & src,
    Fragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html new file mode 100644 index 0000000000..6d21685c97 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperA< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA.png new file mode 100644 index 0000000000000000000000000000000000000000..9d9069830ae96ab7b2dfb11a4fe6e8d2422af88a GIT binary patch literal 1417 zcmb`HX;4#V6ozkt2(r{xtlpqv+X-M0!W9rfz?7W;i4sIMaSQt*0s=v>L8w?HDg*&# ztFnnOM1cT?C|u$QWep-?OxP4i1jMMwl0-sd`>((I&75=IGxMG~b7r3TGLI0QHPy7$ z007X$yEu>l06|meBdSV@Y!lgVOHsfh?nfQh*Vh&D9N?_#19M$=VTMo$PiG zuXjch^h$At=5%aMM31=CY!D_8g3Pj-;`!@fdBdpilzRCFa8 zGmi*){S-FgNn!}oxjKT948=bFc48JjXqED2z{l!L+fs{cNzQQ3O@_A}n=NzIzVQ~< z6k)>S)Cz`M11d#4)02Arh>=&0?E8Z8g7JvGmHln(N8D+W-fWhU_!jz_S4Vb*Hxt){ zUdv-RHi*}{4XK+trw`IAjNg&aWRbIEr%o{bAjUH%ul+ z@M`#>kgX89I}@3Kw)B9r(fVp2ecIsws9@!8kZ=A2^-0{|=3iAOWojUB=*yy(8E|~h zDPVQ*n&Tkw@lGmY(&-Ws&O~lZRX_wNh5_PM2vAT3Sa`s|xiW>p0ipDN801;d|t2^M6-{BBz!=AFPPam`WOwdK(o^^rwZ3N*O~`iyb3@A^d8<;F{6MS3vj~ zcnbPX z`@r>;kjxi_xRkhJL1d67OpCwm!C%t@UP9f3Sd`E7hCU+j#bCnKDZB5deJG)f+%b2h zd*%0}Z^W_=PrXAj)@ljOAsA2Xdt=ssRZUNE<+$IM)8=ats^w}QBxMigH?gFBHZpm= zFNVP`Ra^DJTdSZn|%0qmmC_uOjR!@&_eI zre>doc~ni|uylLZve^jaw_=)CU(mY)a!p}Fb3*GAp(D0VkWfenVtNx7>(}@3ILpCd zg|;OR{$=)tmy;k_rD>J+#-XTi6qP*}Gh8*!Hs$9I?|*CqTAsM0je2jrJ-6p$jBtR5 zrN-@!5?`fa + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..9e370edb33 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,211 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^T. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, 2, 128/sizeof(half)/GlobalTileTraits::Threads::kW/2 > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^T. More...
     
    typedef GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^T. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^T. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsA/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^T. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^T. More...
     
    + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kA, MatrixLayout::kRowMajor, half const, Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgA> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileATraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, 2, 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> cutlass::gemm::HgemmTileTraitsHelperA< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperA_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..389d73d1ff91b89a156ec8ff1d8ca0be4bb82370 GIT binary patch literal 1654 zcmb`IdpOf;9Ke5jlxrazX1Ua%s74-LFtJmo%)&5^lr)sfEV|f`46UTx#^e^G3zb~b zDMn*kIVr`M+gftjQ6`&-HMF^$oz8jAKmB{&=Xt;1&-*^#=llNi`9AM;FBD?EIz$}+ zfb~e%gJ=K%2^8$5s-%c|I1f`r0(-gp9$H;pRp1xtV!90h&fEEdcEalD`6sES9U zd;mq!-)rEFuPp#5uSFi*?;8)Art&GmCiN@uhHADPK1aQ7Ww`lNxNV2{gz>$8-NGz@ zpt1P2F)AQrs4)s#k3=7!=(|iokwP;~45hl~;jFJSIrXz`@OoXR}R( zMqX#VLctq7%s)fCw?0<3E;3JTy1%V4{@87lV7XmnpDv1zGspRe88nh;N1wCD^pu+W z)A>(w!tmnqkj~Qya_gSwZ2=jZJD_gM{IjEhS=~+rnYoYv4G&Tq4=W%MuDt&!#;QZv zqH;@Ib0;hTDPnV;nghUZt!YER&h{dc-pZB*JE#I^ycmd9`b? zB|y4Ob+8h=_nIMfB;?elCeAJ@UAJs!Vs(rCx1xdLb&gabujqok{j}?D!)MmRKRp`? zdG<0s{{wmR6Sjb7HA!mTxCr&Q%;u2IpTH{55bDCghK_aKU5{L2n?bR`;RNS91moe; zcp^O}av?xQYP6AL=R%n zGqo-ON$Cg}*=p0&P@tI!K~pyGFx?hp2kVYVh;x#kGx)&Sl`m&Ry(&*95x6u=N%u;u zOhfx~;u;30*`&SUPs;=_wEvsh&8u&}$`VHqe>g7*S6|U5?(J}~~YdTyDGH^PYRLWnTgg&B ztPp+p&JA>d^4d+HdYd1fIC)3HeLE^-UTj+MnP*x@5bfOF$je*|;r({wVFKeI_41CK z=|Gzl6I}zEz9RvMSu((De!gb6;x8CWRQc`dW6qkff2IZe%eCSR#|#U#Xg&CO-Z@|H ztoU{B#CzhI{C9;F!DE9hWvn$(7zT!$HOjQ|#I3vwknC$|0H+)+khSp{bjYjH%ym3U zgpLs#IhUpTpj(8zX zhlh)_PI1D6=m$_C=+o_j;fJ34>#bkb@9*@N>YFJ&&oaS_vtt=x`OMeXvze`S%pP80 zR&iRJ@uchS>&l2mm6?o2xG2l%H9Ph_6D@P63M=^C>Fly_J2KAzz&0Cd{sYH)H4OqF))L}uA zcKf9;t3ACJqQz4@a@@kXMAbE4eD?KTI1Tb16+>np#Pra*v8Kv&&IOmgp($Q>^Vs<^ zhkYEHg~aeMQ!JauNq87^GU#V2nPms#wl(<~9n!j-3Zn__?_-Dkp( zrW_`%0cJbr$UHnnbLU{pNA)0*=}o02BU6o8WHD&uiW>zpGX$umtg8Ir`l~{AG^lYJWmEw~CkcUtQ>kb5w{sIC + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperB< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB.png new file mode 100644 index 0000000000000000000000000000000000000000..87ca082aca7d2e53bb344e53256223f0544cc972 GIT binary patch literal 1416 zcmb`HX;9L67{~uwu17iU)&^QE)<#-nIf++ebSWulL_`58-sXjvo7X04Ynin;QfjGe zuB#ZjxfbA&k_TyM;+>SOh?TeCwH}Me5=*^o-|jQ>{GRVKzwgZRy!gy>8;5l_HQ87XhW!YISmPb(+%>cSol_r%)&)@e(iSTBBGz z_8g!yeTvs&f*k-DfYELz2#I=gg>zZ0dpD!ZoL)6(cP~^ML^Ner&tMz(GwX8Tyyuht z%Xv}K+P>N->7d98y$ik0PfLfXaNIjEOA}D`xa;$i+4QjP?`0V<3VvOaQ<_dIiU!5k zEYY@gkF_tW=^taV`p;!^SS&~>zdEth8m(S`N**qITU17`xqx62$zDe(fiVxn!=oz) z9c`clOL2xn^D|zhcE^y&njLwjPE%98-y;9DJWr|^O-yG^MB%c;pC&9#=4Eye3S$WzGSryyEcjAf}z|89wRZ(u`nQx=^~as2HLw z=OkH&L)?+_Ms;9ALff+>QHif(_N+5j6l|B=aM^EM@y-XWq7-uMuR$(E6=PUTd2_YG z?&XIXq?|`lCWk2`UKjQ4Cva8@+xnpnOxVD9hKt$@9+%CNh;O1_`Iv4(Uqz|UdA{o3?}^3-_JFF4^$?RF zYo}sb>vvPGs@%y0r}EX*>-B5Fv2*>7WJ^}w=B|>}&bhdl24v)eH~-9gk9Ji}44(p7 zVSZVyGdzy)qA>_opgk#7-vDg+Q`Xlogon*dRf->16|jU17~z&%geXHGuENUH82jb? zOd>hKOqj*7Ar>e>LAjJ=7C$U4smZ+Js@`Md552#pQkz;oGn-K_Jug0N86hB?e#h+P zn-fp_w;u`-X3ppSK^iBt(M?vikn~*y&^RiMEYTojozC&dM+K7g9h`15mAo<%Di5^1 z=R}ee%0l`bCsija{yr4Mb6k*@G}8pkF=hwIan&I<&5^Ra1Y?|sTUIcd7O?QYELv|v zYwB!w){K?AHzxRHDti(Wo{%J+UvXP~-Hh7;5CunjhQX>4r-ZX9M2+XfRAR&ej0fpm zRCYbv9MLfZUA?u^WENG=LVG^oK*Yfs3ekmZ93?PpE!0%|mEo-|JS(Xih2@RWKxE^z za^4P@iVaUso;vfDHXOuoG4khtyY)u;?rocFQjY$K6a%&|Kam?fD5AozrTf|&CQ`un z*RD@vsV@wRH{1Y52wy^`;d|_Rh{Nnl_SOQ|)6Ujh*;?GAJDUI+g>`#;(m(BA574k> literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..273311c271 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..8a59bc287c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,211 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^N. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, 2, 128/sizeof(half)/GlobalTileTraits::Threads::kW/2 > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    typedef GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^N. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD >, Shape< 1, GemmConfig_::kThreads/GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^N. More...
     
    typedef GemmSharedStoreWithSkewTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB, 128/sizeof(MultiplyAddScalar)/GemmConfig_::kScalarsPerStsB/GlobalTileTraits::Threads::kW *kScalarsIn4BSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^N. More...
     
    + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    static int const kScalarsIn4B = sizeof(MultiplyAddScalar) > 4 ? 1 : 4 / sizeof(MultiplyAddScalar)
     The number of scalars in 4B. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef HgemmCrosswiseGlobalTileTraits< GemmOperand::kB, MatrixLayout::kColumnMajor, half const, Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>, Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>, GemmConfig_::kScalarsPerLdgB> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedLoadTileBTraits< half const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, 8, SharedStoreTileTraits::kSkew> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreWithSkewTileAbTraits< half, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH * GemmConfig_::InstructionShape::kD>, typename GlobalTileTraits::Threads, 2, 128 / sizeof(half) / GlobalTileTraits::Threads::kW / 2> cutlass::gemm::HgemmTileTraitsHelperB< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTileTraitsHelperB_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..c90cbb130f90839e77b475beac0e942e93e1ea63 GIT binary patch literal 1683 zcmcIle>Bqz6rT#)qIJad%8)m-RO=U3!z86ie#_4&R7Yz(Ph>G)btL*RrH~|*B|Umf znhKjUW$9tP2(`AE)%++n8NTi3+k5@l-|wDt@BQ2#=iK|p=bl>_=IiAg08< z-oX%v0jS4;(8YSo=_@;}cPj(^LU-$QIz5!njn{C}ujtW#+R@Pw!N@$I51}W6{X-!7 zrhnz#>(DI_h>;!9duM2-!Bi#xcL~nK8xb8gtE*}&-0f=%^JQ@RrjaATJRCCltI?To zCYf)>XObBm4n!F7LY{eS@glo(W%S&=Vtb5ja3&F6@I?N3X7Ag$yV+E9(iz#12NpLDKgzl(3}V#BeTkuu$Qlw2xPtS1c<|@Xs_x;mB`3h7gb2v+Ab#c~> z@Dw}%GLFlN>?(^Wx>tN^IrlMX2nO65Pke!cyG^5~nr~oIZ|2ymL)j>0LQ?qU!}Wto0gNE5uSpPyw)Si6E4!{AX2~kA zDYYLq%(=$ILJ68FdW(hPYwR~^VtT=qlOC0d7EZX*Y8ff+iSj~X)nQcJaHJT)=rP+w z>#UWXjRN{TCA)XXD(o|mIVKuwaiwCPxPg+j6ZB`}#j485v1n7tG=$VD?m~2TecuYC zq9+`DM30UldYk;Kg??!PU~3Q?Dg0%QC3g}EsS<9<1WNq1iytH+f3)l632Ff{7m?bP zFV9O^F&YJCl%|ID#8`BD{3RB{b&G}4daMkj*17p*aG6Q8P2`n>O|<=VJKc75Sw<)9 zI7L(2us|<4{Cw2p1ALohyW#5bZLHR4teV09O3k>zxQS}$9b8*Xtzn(uHaYWGr}6l= zodrET=2zm1 zSoTa^L9yZs7>Z%nwW8Z$EtBeZwPo}?v_^M@voF|_E-yA;j%h<1dc(0qgBg1@lN=q- zXKpZua7GcHi)@j>nYo$ZtYe8W^t`#Xtu^apz-NPdYt9)%3$LSW^$gC{Q`tbzX=7-C zzW)Ctyq3oKVhV$O3k~7=mCSw`isS$qaKV(}^Ry^rKA@~w!(4a#($ul?AJ=1bsIZ&K zYo;~&Bi?jAv~Da)VWhztyMUdUWy?e}k>bkm63njQPpY%>T{TXdxeCa}wn>9o5J^b< z*nA>gseCl=H|3~G84BrrFi!-MF5_CHQ%f8sW~8byDX7r}J#Yp<<_=QxY35QfsZH>` zd0}litSS73m2&-qu=E|23H&`WBdzDED|gh5*ZYzny!`nzV2-ny)n08>#SepLD30pX zd)L;mX%9z)?Cm#k(if#}34(51&}iCQ;+uBs0MZmU(8od>82PtbRL^zXegceOk$heo z(I1vzlBE=?y}+5H&Zq~&)Y6QrHAzQq$g;-hEOy2|jcJ)BCjh`tkPY3tzpxha7PO0d zwkmYK;OmNAvEyPZ!8K#Jd?}WO7JM477_6_3984OhNScNLu9HI?e6`Rr z=Ov$?tbXd${(3}F_m-%Pz2uUip=;SJq z;E|;^B6Z#aza+AkHguF^-pCs$n~Go_t=3-E@BZUAdJFTx!_ZaZL*0NkT9=;iqz4#l>?& tKJK22Mu5Vf_>L9t!`KJ;V(!Wj6(z^s{}x9c(0>pJakszspI(Q~{sRIEQWgLJ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html new file mode 100644 index 0000000000..2a51feb222 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Epilogue typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GemmConfig typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Index typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    kLayoutAcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    kLayoutBcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    OutputTile typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarC typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarD typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html new file mode 100644 index 0000000000..a9e4c26963 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::HgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    typedef Helper_::GemmConfig GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef Helper_::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef Helper_::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef Helper_::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef Helper_::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Helper_::Epilogue Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Helper_::ClearAccumulators ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..03fc4145b8f33ae36c28b48e33f0e9b0ff602468 GIT binary patch literal 3190 zcmdUwc{p3?8o-mHRH>;dQ>`wRqPC8;4MrELg3zc2C2`dnDlMTXVrZ$_T8)?>mMKyt zmfETaU9@&3qP2!WDv71m2qHIn+nHzny!XHRJiJUd&i=S<+uA1?9JihvMb*Au5&?CO~?P_jrcBlxs#y3UK zP>3CXAGEWm?N_w{05Ks8)ALtDfOCXF9P_25Y3&=|RUT%Ew9C);a;S=!x$_@RnHMwB z5b%9CzR*XTqHFD$QP^fh(Z%~@oYb+V=n7h;x+}I_PJOJXU~wsRcQZ(XFA^0GS!k!~ z?i=voi~l^%zg-To{2rC4m=qJ_^gYU5(dP&VMDVvv&6ekGSwQ1xIlc+-sm-^m#EXo% zcMDEiMhV%!KEAL(B3+`56(iE~AGEuTIA?0`ET6|+Na`OpI2mphQydd|Vc*f0y5misbolvyv)lC|ZJ+^(mKn&NBP8-^wN`6fYg&BFWWrX{>b_N;&4x89$;BMCXQ! zn$^C5y@=K5%C2wRUTgLCfx3Le;SzDI*u*S|-iw3)*}~d|%j-ho@a~Ig)@$WQvOQzx zAHJ9Y7lqo(g_4RDMHdrRQxZ9D<`QRbW*u4kkdplzr&} zLmOVuP(sp(c3+}zZI#+S9Acj(LZ@E7qk;T%Q6NFzw7wc+nRobUEfD{KQ-9}`!`)B84p~h)QIWY888`U*xjI1-ECVPZ@d-l=F99SzweYkIL z8ndV+zP0Y5QN+0+4acV4hn>}sRlWADNY*~a%1Fm-tXc^U`~+LJ(as$901*z4qHr5X zZ-Tv{rt0<0MH}Xdj;bKVZY;ZNnYrc^S%T=!!JI1{tl^!ds$4y!hik+P4By$Aqe^eE z6YQ%El|WBFM-1>_v8u4`_77S-r=GvECGy6weQf0Q#+PN3ps7LY-2P0E%j!vNb%;zx z46RblzxB;|>cnTqqbR$OKi4r-hDpK4gkCf`cw}s`cMNm26qYaRtbC>a_D*ND@b@o* zG*l6dYRY#iNEB)hnn>F7#_o+t;GO`@`NYEPx+SpTC)E(CG)AtXeF+~hUJn{9 z`Zhz%cQ3YpY!4_-vtk0c`8AR5Z)B#1D}8fvW|~{NKwsu33+E3J#;#~tgbBPZAK@u7 zcHw-lrMkfh(TaqLA1Sb@!yFL_-i)?!I{UUA8w=q{AGhG)(uw7W=~h^IzFUNCX-@w@ ziq*O|Id3#D4l{BssUaTI|2NtA;FGyQzkB4(^>&Cn z7yThS6EP1l@T#WQ38ZnxU+bfqLXnMl|f+>`hypKLIXGRW9=Ut_S6iCL5yRC7Btt zmy8W##WT#X$zv9lLJ|T~}Ty ze?riy#RXJ~Yqut5Ai+vG2o=L7)7HT2vh@uHY+r^eah=1EJg!C-!?pO0NLQx;zm(P8 zmPTd^9Y;*#1PA5zkmtRB4f;?x-u~#YPUZZyfl(6c)n~R_S<{t#fGD|Ch|}BYb>{re zA8OJrye+vIxZI+fjHm6#OADT@<@tG2e^E9M0;y>)KW3qoGiURkIKV!>>_p*0%3+nB z0IU=Yx@=Q}=@n7;?-;-W(+!Pv{4IAq)ayBa67nU+>u##qP&U7jBYljofSY3P-$&^s ziQPFe&Py-rXSg|Ao;amkq`;)Maz{3rFy@^dKUEj|ZEI_Y+*^7E$+@+p0?a+$&$wnI zF@vV$6H@7k7gooL8n%BKT21Z&PkiYdCcX~($z6ELxAB2mjC>Mde4?vfN2pAs?iU@x ziKOL(lQ-_hH9?9<6gH<$g3g`wKst8bh(=CpW-eOWl9nELU8?=PWj0On`cRE|?j1Pf zOpy|I4XaxZ=SK4@rZVJLs@L zQTYCV8=fGTzd1D~(2mxBu7YgE-?9bYZqiZVXA^=TZqkc+;1e>{nrj7C`gcEr{lei8 zP~9aW&gFo$}O#Htfc6e(Sa)D;o!pYgD3bCJX)U| zS`M2o)LjTCfH8Rxp5Iag+&_(Gg6hG=1Z`#!-MMm5OqW3QP59Bp%ta$?m&zEM&PH{i z_Al2&yZzn~6a2C5?en$UmCl-v&$_M#C<){}y&jcmHaOmCM`@aNpvSFU>D=&_QXft& z7_KXJ!+e_W_h*@ASa+r=?oC5kl`C1=B*2y|M#T8jYya-ORD+_sm*>M#r#pE=q%k;2 zM{@3Tv&>QiNc>jp6K<%Fp#+GgZ%B9VU|vn?6&JJ(w3v*Qqsu5u*I_WNf%^?gNEYWR z_~bM``pK|)KO{pS%k9IRS3F}SN?iDnq6*eCWz`}* WB|j0VI9CGTFAFn>Y3T*m=zjyeFz(?1 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html new file mode 100644 index 0000000000..78f0782570 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper-members.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + +
    ClearAccumulators typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    Epilogue typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmConfig typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmEpilogueTraits typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmTileTraitsHelperA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GemmTileTraitsHelperB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadIteratorA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadIteratorB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadStreamA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalLoadStreamB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalTransformerA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    GlobalTransformerB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    MultiplyAdd typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadIteratorA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadIteratorB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadStreamA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedLoadStreamB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedStoreIteratorA typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    SharedStoreIteratorB typedefcutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html new file mode 100644 index 0000000000..1ec8904e71 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTraitsHelper.html @@ -0,0 +1,460 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef HgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > GemmConfig
     The HGEMM config. More...
     
    typedef HgemmTileTraitsHelperA< kLayoutA_, GemmConfigGemmTileTraitsHelperA
     The GEMM config for A. More...
     
    typedef HgemmTileTraitsHelperB< kLayoutB_, GemmConfigGemmTileTraitsHelperB
     The GEMM config for B. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
     The iterator to load A from global memory. More...
     
    typedef HgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
     The default transformer for A. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorA
     The iterator to store A to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerAGlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
     The iterator to load B from global memory. More...
     
    typedef HgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorB
     The iterator to store B to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerBGlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorA
     The iterator to load A from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorASharedLoadStreamA
     The stream to load A from shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorB
     The iterator to load B from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorBSharedLoadStreamB
     The stream to load B from shared memory. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The functor to do the multiply-add in the main loop. More...
     
    typedef ClearAccumulators< typename MultiplyAdd::ScalarCClearAccumulators
     The object to clear accumulators. More...
     
    typedef SimplifiedGemmEpilogueTraits< GemmConfig, EpilogueFunctor_, Index_ > GemmEpilogueTraits
     The traits class for the epilogue. More...
     
    typedef GemmEpilogue< GemmEpilogueTraitsEpilogue
     The epilogue. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ ClearAccumulators

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef ClearAccumulators<typename MultiplyAdd::ScalarC> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::ClearAccumulators
    +
    + +
    +
    + +

    ◆ Epilogue

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmEpilogue<GemmEpilogueTraits> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::Epilogue
    +
    + +
    +
    + +

    ◆ GemmConfig

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmConfig<OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmConfig
    +
    + +
    +
    + +

    ◆ GemmEpilogueTraits

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef SimplifiedGemmEpilogueTraits<GemmConfig, EpilogueFunctor_, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmEpilogueTraits
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTileTraitsHelperA<kLayoutA_, GemmConfig> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmTileTraitsHelperA
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTileTraitsHelperB<kLayoutB_, GemmConfig> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GemmTileTraitsHelperB
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadIteratorA
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadIteratorB
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ GlobalTransformerA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTransformerA<GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA>::Transformer cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalTransformerA
    +
    + +
    +
    + +

    ◆ GlobalTransformerB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef HgemmTransformerB<GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB>::Transformer cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::GlobalTransformerB
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef GemmConfig::MultiplyAdd cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadIteratorA
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadIteratorB
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorA> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorB> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedStoreIteratorA
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, int kScalarsPerLdgA_ = 2, int kScalarsPerLdgB_ = 2, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::HgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_ >::SharedStoreIteratorB
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html new file mode 100644 index 0000000000..505f08a55a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..7846aeebf5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..092d948b91 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Convert<typename Iterator_::Fragment, typename Iterator_::Fragment> cutlass::gemm::HgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..4877f00119 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..8837b6996e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef HgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef HgemmSwizzle<Iterator_> cutlass::gemm::HgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html new file mode 100644 index 0000000000..99325439a4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..fdfc6a0035 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..93727e2762 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef HgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef HgemmSwizzle<Iterator_> cutlass::gemm::HgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..e655326e55 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..013566c2f1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1HgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <hgemm_traits.h>

    + + + + +

    +Public Types

    typedef Convert< typename Iterator_::Fragment, typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Convert<typename Iterator_::Fragment, typename Iterator_::Fragment> cutlass::gemm::HgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html new file mode 100644 index 0000000000..748a3a4955 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IdentityBlockSwizzle Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IdentityBlockSwizzle, including all inherited members.

    + + + +
    IdentityBlockSwizzle()cutlass::gemm::IdentityBlockSwizzleinline
    swizzle()cutlass::gemm::IdentityBlockSwizzleinline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html new file mode 100644 index 0000000000..68a70c7b38 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IdentityBlockSwizzle.html @@ -0,0 +1,157 @@ + + + + + + + +Cutlass: cutlass::gemm::IdentityBlockSwizzle Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IdentityBlockSwizzle Struct Reference
    +
    +
    + +

    #include <identity_block_swizzle.h>

    + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IdentityBlockSwizzle ()
     Ctor. More...
     
    CUTLASS_DEVICE dim3 swizzle ()
     Swizzle the block index. More...
     
    +

    Constructor & Destructor Documentation

    + +

    ◆ IdentityBlockSwizzle()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IdentityBlockSwizzle::IdentityBlockSwizzle ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ swizzle()

    + +
    +
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE dim3 cutlass::gemm::IdentityBlockSwizzle::swizzle ()
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html new file mode 100644 index 0000000000..bb78c951e8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kStagescutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kThreadscutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    Warps typedefcutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html new file mode 100644 index 0000000000..cbcfef4564 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    typedef int8_t ScalarA
     The scalar for A. More...
     
    typedef int8_t ScalarB
     The scalar for B. More...
     
    typedef ScalarD_ ScalarC
     The scalar for C. More...
     
    typedef ScalarD_ ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, ScalarD_, ScalarD_, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 1, 4, 1, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig.png new file mode 100644 index 0000000000000000000000000000000000000000..13377f8425277abc8597ac1ea80ae7bae781163c GIT binary patch literal 2603 zcmd5-c{r47AAUQeNa~D^ED`2SDNECc4vJ`)VKTDit573TVj`3ZQCdt{GGmz~+p(2x zXfl?OnM7kNC(1U~#*#FdB1?n$M*6;gzw7+>J=gU<@BQ4*@4c_;nMVYSfX#E?3tG(uRqXyXc6mJlE;vgvDG^x8|C%4z1v^K9+OQ zm--aT^}f@oc%Cr>&%-hCyjCWj*2Bb~l%jzusC~q9mim9`0Km~ zA4%&sD?F*7k&&AJwj0@g8+Z31ZErC8=<=0nR}>ZYuMtFw!IVAmx3;PhTa(6cDu!rN zq|YURabBa(j#@3J@lZIK1_Km$n1IfyOm$Y#qqg$Dw$Y6&>GjdiXSBr4 z0wegu66oxTEtnmox48xK*xVD}moB}|6EPQz%hcTaXlEk(UB50hTvWt0hQ35F$b9Op zgA-xWj1$Z6Shx`lU(@LBy^P5aug~sAHZy2%D6EZ;5L_~8Y_BoKC!Ig~ZMH+UVz|q~ z_Hi-!S%AS8dnaYrB=1u%T%FWC;yhBwqodtMQQ+kf^#s+CI(*<;0SMn}eyhG-)$Hzl zo$PCo81y`3D)IQ09_?aU$-r6oy9bjv?o9OaskHrDCEgkJ@)g@b+=)r<+P@7<$p;F< z4z)a%TR^v?LU3Qb!s?>27p(Ej-rA~aLzLVReRVgtn!Om+C%PFm1HD19xa{u+eu$vO zT%HI5Z+3Y|cHCRGuKH|EQs%4ODK?4iWOVn|zD%gK%R;FESr9x<%kj(VIXHWB&JkNJ zU~9pMcG^cxQpvTVdp~}+EA@I@nDH#NM|*DfpH7sgZlDj=k+?-RI4#*FJx9OAIOY?Ic1=f<*6XB#dHT1gw9NKPZ~R2WocK%~Jmd?z z+U!QbuIP0r99s{T&7^UuL9-N|FU}$I&<%DHxep3Oe6+uy34^otKuDyTS+sNbbeJ1a zdqd42Ng1UIs}b0u4yKatq0Fl$P$9fXT^N56OnyVMMGF;p<$U;5U_n^3-$flru&J|sXb!`=#6kwrQJ>=Qp@H(pllbEhMSGlPE5hloJ)2OM z(eb$~Bkqo`Kfb-0j5vvv5w&1`#N18(W>(Qfx#rV<FH-CK(-HY|k=1iY@NUh+i6Xl@mWrD1t-`qGJ5eIb7KEmapWIF zaOvE9?tkMe7WDZi2{)nx<1z6#9`m(7k4agLPNp;Lp>jS}%#(Rar^U zuFC4wL!?fxY`Vwn_$9RYKSEk5|KCDGs<}y6JKcsPr~A6OTb%85{W4eG$#Wz(4X|P- z>VH@l?zs45$()sn+f}j2a%9$jENz=9gEr0&eo7dwn`rXZh)Os4=6v*?L$oVZA3z66 z0pXkMyCcAZYOFubQK zUr>JRodxTdn*UXX#l}~!oud0BmJkPC0xjoRSt?srGCbh6aCGC#%J2%|K%*i-#dIx! zV5j5~#GLXN@I<1an!bXX=ZAOm7HBa%6H!M&c{Gx-Sgi$go*}Gpw-NL+g|q-XbLaIf z+iZ*1<*JcXMCmkp^mqsYo8DneuggN`C8;nDN@ zZTXju$Zalb1oY}4Ql+sd(rp7ZS*t-8D1oBlnMii%8%R=)N`HmbEItPbuBqG&<1dh! zN1L)XPwZQ3^zt{UQhCV{?4xQZha!~{zKT4to@&grLqF#Hsl8_SaY16I%1#JekuZF} zs?gR)3C!D2X{k;Jb#(kS{nsKxs>(P(xRgY}v^m=}Ol}$cm*N W)elt?vV2Ve$v`0-j+9uPzV&a$MyQDZ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html new file mode 100644 index 0000000000..0d364dc93c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kStagescutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kThreadscutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    Warps typedefcutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html new file mode 100644 index 0000000000..948f565c9c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmConfig< OutputTile_, int8_t, AccumulatorsPerThread_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    typedef int8_t ScalarA
     The scalar for A. More...
     
    typedef int8_t ScalarB
     The scalar for B. More...
     
    typedef int8_t ScalarC
     The scalar for C. More...
     
    typedef int8_t ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< int8_t, int8_t, int8_t, int8_t, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, int8_t, int8_t, int >, 4, 4, 16, 4, 4, 16, 4, 4, 4, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmConfig_3_01OutputTile___00_01int8__t_00_01AccumulatorsPerThread___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..584bcfd7fdddb3ee1f8a36832e6fb60d728fb505 GIT binary patch literal 2426 zcmd5+dpMNa8vmpwY?9G&nM%qasnCpDkzxjekYRFZh^a9nm*kpDX7406Q<=#vx7{>E zF6ofH8EK-3L}OfPP*d5I`!F+Z&DfJV=Q+>$=bZE3SCfyvD{*uA_V_rDvr{<%$q*NOU8&NsPXuO=ws|tn^ zt&MHw47zrde)e7b$>PEo71^Q#^_b>2L-j7?Pd3F|d11PM>P?NoUxz9X7@V3Dr!V)^ zpflqt2*K$6)l zls;IawMN&A<0X+N8>|Lj!t5v@KIiZo$Ju$Gl)?;mB{FSn=&Gk9L6q2qUAd87+i<)h zNtyBUbBug^OwMzk_E2R!p;TO-11a-6~ z>=@$Ey(^utIu#=Ze%C49~nCo5)3k4y^H5fsE6!LZG*=vW;SOnawCO@ zxAYGeIae;2be;%4QoDpMeyeF2bgH6iz%p=A8})P9_14pAIUH~Jgw|9j#}wa9c+Bp9 zFQ34!@rB!F%F&d-Is?c2cIlriuM7x+mJ_M6tI6Bl&(7VqtSmOn_eyesLrYTdjSU38 zz`{NGrn|@*YYUIhJMj0yRgl6q?V>7W2r1n$Wie$fN_X_|bG-aEne;*7k!mS230 zkyJ}aj^JGJj6R}7zH>o7z2YaQDVcVR28ecYdEi-exEyL##nCUv)mRT#zuz4>8U5PE|$cJ|kb0_P};JM%)*)#4fqwyr%hgI#`6ACHI- zYFpm_E!iTCC}1`>#twh<7B(J;Pmac1n|7~qxjP>lIOf!8v@&2hs~*)pg>3z}aEiNm zpD)6OqTwHlbm(=w6r4pJD2Bzq5UJWmRw5~U#fQ;f#q~4T`&N18irA%HyQ16IX47o{ zaD;w2P~dY8-RkVBtv9EIzxPXBkD!xuedauBMb%9G24giY3*Tg>Elzy3R7yxd+Ed+|A*2z{wJmBK*Il3rF6y%bOAL$ zx^G&MBxMMIm_Szz;Tv%Vz%S^4q89(Od3*JG66`}Jfr>px>3+8IqT-oLBB;o z>1k8l9ANhA=>XD(ZU0@=2BYBaajm$Zl*nZOac~xYH(#po?YU7w?K{Mm5|{rbn`}98y>bdfwX&?#Cs!}MyZ;N z%@|U?8Axfad36pqOEkm4W(XETPYZU%#6>jt9mr0};)U}1YZJ3riOaVxnNZsVZmIn` zq{fiK)qtm2ZI@#{q$0iaXYTq|CrsNuI&~*7X?sRR3Fx@wYJ0df8!V88LJut?qQ_b` z??aDtO89cjM4y&p9XCMGl{$_Lcz`w2>?-@aGL+pz;#QP3St z(v@Ujfh=!3+**+om8>hL47wg9?>49d6FxUqC5OI7v2x!yA#vbR{IoOty1y!sQ{UUP zlYoBW@>@{00%=@Qb{5ZF;XY5 zPgG^6O`6{}k7t>+T+S!%jq8pj>8@|5LnfYSWXJkhYO!+FiJL?#6mBeAU32f7?$sSjI99=>p>|M$xx#P{4+8%IiQ18|zwaJHsKF6$D$rTv1 z_-|U;e@{z1%qmb5z22ZpE_4B&K+-|XahX9KW(GS2*r{t*?8Uu!J}EJ~p7H8^nR-?j z0OFcULfJJMDL$I>dE9n&N=lw7{l7fL(LIoVD|ltGv|`L)oIV%g2uK5QnhKJ&|8 DkcUG> literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html new file mode 100644 index 0000000000..f3edab99ad --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Delta typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html new file mode 100644 index 0000000000..3aaf681c40 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.html @@ -0,0 +1,254 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Base::Threads Threads
     The threads. More...
     
    typedef Shape< Base::Threads::kH *4, 1, Base::Threads::kW, Base::kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< Base::Tile::kH/Base::Threads::kH/4, 4, Base::Tile::kW/Base::Threads::kW, Base::Tile::kC/Base::kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef Shape< 1, 4, Base::Tile::kC > ThreadsDelta
     The threads strides. More...
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand = kOperand_
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout = kLayout_
     The layout. More...
     
    static int const kAccessSize = kAccessSize_
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace = MemorySpace::kGlobal
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Threads::kH * 4, 1, Base::Threads::kW, Base::kAccessSize> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<Base::Tile::kH / Base::Threads::kH / 4, 4, Base::Tile::kW / Base::Threads::kW, Base::Tile::kC / Base::kAccessSize> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Iterations
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Base::Threads cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::Threads
    +
    + +
    +
    + +

    ◆ ThreadsDelta

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<1, 4, Base::Tile::kC> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadsDelta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..4a33ed8b59eeea27d4aaa5bf0c013a532100e6a3 GIT binary patch literal 2171 zcmchZe>BtUAHYAO#zYF0aYtpNRoqJ^y30yr6DvPLW%?ni6}jbCe(WkmV#VyDA+e!F zu84(XQFf`##gt#8E|wLkEHkt1PIbQDzwZ6>KIc5o^M0Q5exLI`=Q*#}c~bY`U3IjK zv;Y9m!C{@f06>kViuY-%tJdp|2s_ngxX<0kMX6M(!jE4jN?Rf`RMFSTX0!e8$M~v_ znh{=jZ$Oo_IC?vLY*bfTIOm-{F`%h}P|D0BE$15p`#Tjm@h9}2gW(PxuV794858bp z;3ao1HlG=+L)3*1>J8R~zEAX^=uxN{jQtsWD&sQTGMP~dN5D^6!%+z2I$YX1CK3n1 z9n}8Ot7gcVGVCOkV!I6++nD2Do_oCU;-hKdUEU%S(o}g43vRL7 zl>#=*>vTEy%K?hmMnR=*U6Vt)+m=hqCfv|mi(4sc~m)$UN52IArV=DbaMR14vqmKT08{q9li>>sc&4VgGgjwN0sKVIT2kFsg50>yc zE$lMk4({IPF|vgoX&JUGSHkLkXi72&6xDAmAbHr&UPBWID-?Wsh^#lac$=%EPF5b= zqHC|F&E?JQ*FEW#nvg;pIGkIxM{$w8d1ZW-qI4;l}JOO-X5jV z6fYn~5B-E_{VQ+>@#pL@Mv1Wgv?+Pq&BOF*mq6YaJyT@b3wIbP7DB_%-0G9KRO>Mv zx{bYOjuoXO{*|x0wQIAOP3bbv8e_2jGJtjhFVfrVj7 z?7(yymTMI3KM~Chx!HKSTnbf^HS+RC%zsOL+wj25f@(EzD}^)UK2P@CI-Plau)415 zOaApQ{Q)1+=Qt!&Nlf-mN?;2bg%Cj>t)#-`t8VGY4k`;+6#kR+uDII6W5UO~xS#YW zj1p_;Y$$qOio}7ET+qRr+B?#__FTstx5Op*G*iUdk8M#Sv@weK#N6>AWv>yodt@|j z8Q~i_Wtsm<4u(He7rN&=a@n9AYOnR@G%^4zP?5wi-=TKIazU0ev;+%mzViL})0TQj z2H+3Dc_c1jpTizgg9EE~dFihORT)xf03`8Q27fU<`D?n0Q;;5@!gR4j`)kQDEb%`r zdF{QSgzAhefx97tVfz3*Rf{T_{8NkSL)@R52%5;Lk>3L;?&iIKl4x z@q|9I{_T|OLV4Hav|Al_uzB)$6~qtTv>n)c`&Ps<#}FZ65nqoLG8LRjIvrqKG!>!aNhsG zcH`G39e!Fw0z@{{idS7)c5YEjD8cW*23`MhfMYD9I?q<=1$Sh1IpN#5y-# z60(4vd@68ycpgoxJ4{C2JwH~n#Zw($To~p?4-KA2?nZ~;x1MFU@66AAamvU|5Y9pM zf4t?M%Zawwn3YN|ojI%Fauyco$$ImADd)ttpoojEZ&A|;e!5Jk>%rOwJwtvLKa6e< zsA5Z@`%gG?AHrLflFyVfA=JeF+*sc?PJ+Ja0!Y4Ia?4d3XDC!NA$?r!-UIrX+jBOk zDMBRkLjJ_>Cp-Vhe$f_HxUzDY2EU@@7n4$^s5Afe#kgSJ7Ei!_@hKkg(a}@%`vFW* zl6^yCCv2e7m$U!Bx@6T3VsqP`P8%k6t~Jv2jhU9y7Rs#Q9-rNRdW%x*hcO z|IIQQ6h&0CoxpR=OF2mCME+yHlYVQClweS&Q)*d)b%IH!W?$_$cGn1|Zvx}1bUaNr zi%FEy9m97Dq6+$CVq(r$w}8z2T;{u-rhSd8^o(L{jpKv!kbvQ-M7g7lPW}QL?fvN#_+e zJ?Xn(90!uu_s{Ue#`LgZGZ-s4^eCaW5qXoK54(JY6h`s}Ct3wSL8aEAl6RnKWA%8W zPH@CMCGSw;@{si5@^?xRK9DYqNd + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..580eee0732 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmContiguousGlobalTileTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <igemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<GemmOperand::Kind kOperand_, MatrixLayout::Kind kLayout_, typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::IgemmContiguousGlobalTileTraits< kOperand_, kLayout_, Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html new file mode 100644 index 0000000000..73d16eaeb6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue-members.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Base typedefcutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >
    epilogue(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    Functor typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >inline
    Index typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Iterations typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    mcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ncutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    OutputTile typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    paramscutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Scalar typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_load_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    shared_storagecutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_store_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedLoadTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStorage typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Traits typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html new file mode 100644 index 0000000000..fdd93ee440 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.html @@ -0,0 +1,275 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >:
    +
    +
    + + +cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogue< GemmEpilogueTraits_ > Base
     The base class. More...
     
    - Public Types inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    typedef GemmEpilogueTraits_ Traits
     The traits class. More...
     
    typedef Traits::Params Params
     The params. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::OutputTile OutputTile
     The output tile. More...
     
    typedef Traits::Iterations Iterations
     The number of iterations. More...
     
    typedef Traits::Accumulators Accumulators
     The accumulators. More...
     
    typedef Traits::Scalar Scalar
     The scalar. More...
     
    typedef Traits::Functor Functor
     The functor in charge of the math. More...
     
    typedef Traits::GlobalLoadIteratorC GlobalLoadIteratorC
     We do not support 3D or 4D shapes. More...
     
    typedef Traits::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Traits::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Traits::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Traits::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Traits::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Traits::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to load D in shared memory. More...
     
    typedef Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
     The shared load transformer for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmEpilogue (typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
     Ctor. More...
     
    - Public Member Functions inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    CUTLASS_DEVICE GemmEpilogue (Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
     Ctor. More...
     
    CUTLASS_DEVICE void epilogue (Coord< 3 > const &block, Accumulators &accumulators)
     Execute the epilogue. More...
     
    template<bool kBetaIsZero_>
    CUTLASS_DEVICE void epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators)
     
    CUTLASS_DEVICE void shared_load_fence ()
     The memory fence for shared loads. More...
     
    CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ , bool = GemmEpilogueTraits_::kInt8Output>
    + + + + +
    typedef GemmEpilogue<GemmEpilogueTraits_> cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >::Base
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmEpilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ , bool = GemmEpilogueTraits_::kInt8Output>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, bool >::IgemmEpilogue (typename Base::Params const & params_,
    typename Base::SharedStorageshared_storage_,
    typename Base::Index m_,
    typename Base::Index n_ 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue.png new file mode 100644 index 0000000000000000000000000000000000000000..f10d072ef1b1c6280e0d7f3b4245320a95e21a73 GIT binary patch literal 1167 zcmeAS@N?(olHy`uVBq!ia0y~yU`z$F12~w0 z0muU50|yR7Y^(?aayd$Z{DK*Pia_A%sSqOu1{OI_7srqa#E(Obgtg0aQqUxkWlMBCFM6GtB(fIH*gWO* z9hH~iTI=VleEm|Fue2_$_Eufzcf)dzKe4AaA3gE>$2+fU9KXX3OrA2`Gv>>_`R}K7 zZvCIVq0;{T?%98@e|>bVqx_iFcGK0I)_S)Eet-S{H>l zi>vqCIOuxg{jpole_B7go|?b;L4e`5tDE^2d2EY39XKa;TD-v-vAl%yx2sQG+uFP3 z_)n?S&Azv*zgRrZJ+*fSw?rRv_Xcz3yYF8yU)-I2bL#5AOe>MCN;jW<`?6!-e2qEB z4)N+8+UNXp<*q%}RnPzbyjgbOSMjYmXI!(kt*y7uyZz?gotjR6)stHos^bms+1g(= zXwdTXV?H&h#5Meq*Ch|z_3cG0x4kD_cAoP3-_#}dS66O160|RIe@?IP+<8%_mrwG% zWNCVK-f6)ceNR9BFdk1Y&r6P;k&_lpouaB41!w4=x@_XRGIQ243&ua`zZg9jS+q2n zSzbR^H)K8nX0citGPeL(N3f6i(l6-iSg208RW$&!m7Y3uxtsBz*=O?_w@SV z`ez0tOzpQjFW7)2({YQS!2wW~_ePC<7ujGN3UbigEt~_k3)qdc3>a>hr)&2A3 zr}y0YZ+c_T{P%Z#eZQ*~c<-hX?G+M69YzRRA!x&HtA>Z@OCbMt2P=a+8rjGpo~ ze1_Y?%V&OD&fL9IV8=PB)>~4w+Ks2q`8{|3X6l~&r6K!jdHKz?UhX#!uH%2B!oNJ_ zyXo3LtTDf@96a|#PXFI+`_s=If8R}T?}(Nun|<@mQpTs}0}ED2+8^F->-ivxH~^Or*~)Ft$RHs``6C?`t4X$ z+10F&nS7>e8*=lm#B=@WJd~*;^SrdzHuL_%KLN4#=I%NDxpMNZuE6j9wllcD+`4`s zcXM_5XU%&|25&#t`rrE9weiDD-M+0^pV#)4>N0E%J;e}Svg`iL194~W_A}>H-};xe zzj}&wFx!Jtvx0&x41dD-Z|X-sX4^D7d*{|#Mm{Ua8$4EXSrS$xR5vVq7tYfla-du_ pwIKr;@70(d + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueScalar< ScalarD_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >, including all inherited members.

    + + +
    Scalar typedefcutlass::gemm::IgemmEpilogueScalar< ScalarD_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html new file mode 100644 index 0000000000..9ae00dbd48 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueScalar< ScalarD_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueScalar< ScalarD_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef float Scalar
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename ScalarD_ >
    + + + + +
    typedef float cutlass::gemm::IgemmEpilogueScalar< ScalarD_ >::Scalar
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html new file mode 100644 index 0000000000..b7179e432d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueScalar< int > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueScalar< int >, including all inherited members.

    + + +
    Scalar typedefcutlass::gemm::IgemmEpilogueScalar< int >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html new file mode 100644 index 0000000000..ab260d7082 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueScalar_3_01int_01_4.html @@ -0,0 +1,116 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueScalar< int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueScalar< int > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef int Scalar
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    + + + + +
    typedef int cutlass::gemm::IgemmEpilogueScalar< int >::Scalar
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html new file mode 100644 index 0000000000..fc77b4b7b1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits-members.html @@ -0,0 +1,107 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Delta typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Index typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    kInt8Outputcutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >static
    OutputTile typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarC typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html new file mode 100644 index 0000000000..16b5df0431 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.html @@ -0,0 +1,187 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ > + +
    + + + + + +

    +Static Public Attributes

    static bool const kInt8Output
     Do we output in int8? More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmEpilogueTraits< IgemmConfig_::OutputTile, IgemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    typedef IgemmConfig_::OutputTile OutputTile
     The output tile. More...
     
    typedef IgemmConfig_::Accumulators Accumulators
     
    typedef Helper_::GlobalLoadIteratorC GlobalLoadIteratorC
     The iterator for C in global memory. More...
     
    typedef Helper_::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Helper_::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Helper_::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Helper_::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Helper_::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::Iterations Iterations
     typedef typename GemmConfig::EpilogueIterations Iterations; More...
     
    typedef Helper_::Delta Delta
     The iterations strides. More...
     
    typedef EpilogueFunctor_ Functor
     The functor in charge of the math. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Functor::Scalar Scalar
     We do not support 3D or 4D shapes. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    +

    Member Data Documentation

    + +

    ◆ kInt8Output

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ = int, typename Helper_ = IgemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_>>
    + + + + + +
    + + + + +
    bool const cutlass::gemm::IgemmEpilogueTraits< IgemmConfig_, EpilogueFunctor_, Index_, Helper_ >::kInt8Output
    +
    +static
    +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..ed357320db770ca2f39e0d47e47679730950ec24 GIT binary patch literal 3581 zcmdT{dsI@{8h55~Y%;ZOW)IWqUX#kwv4?LV#~dR~v(ijcG)l{SlrQ9=G&<=trZ$jC z8BkgFP%~3RL8P)Id|+vUgh1xA5KvJO5V@yT)4kJLt-J2Jf8F1=&i?i}d!O%o`+Mwf zpL2Wtyp0yDT%f0?X9V4~bDy5x{A8fNcm6z}^`h-K1+e9NeGho)bUL6rHZ@ZEB;^9o z`*=Ni^eDJ0EeN=spS;g+zaB8@1G-xdYysq*hwk(^koJvKIGk9rd--=SJon#HVh1GG zc3iyWMWcvfiu>1CZ~L)L$#cqw=k+^rzry+hnyv1BLC#UztfA}vN^JWQd+5e5D1SvB zbl(@0*mm3(xBCl%{|v`%GJ<*IR&Bvx*0D()3cMMFqZVd}qAI&QRn&PIm|6k)CA?c! zB5@u_ec<>EHFUZ?J`FcI1J)duASW5&O3QI&y|hcBX{hs(|3Ig_!TIKE3}+ong_vie zszuyAmLVH*{b5U#@fWeq3UN)8CF9z`(KU8ZOu|DlO~+1_ysbWDO|rSyY7Dhz`KRN1 z^9^!e0J`2Xh!4TdzB&0DUE&Ol%D9?R_IO%w(F)RIJOh6V&60A8{Hd3A0y9*>N7YYu%U$c4aJ^x1xdx|L8e1w=HSe zMJpfw(KVI3XM0i$tqcv~UufTvZSO%$FIn*^Rr>4L=_TWxku#$cQiZ&d*$CFmq-c;Q z+?;h&%F%+r(}EKS8%_~>xZ3Xu5z+uI>)2lu0Kxiqg_+MSO8Uo}V?9uL9vx*qQr7;K z(+(s1j3Js`5F`;j^rq@j*8%Eko-0ioayZ8oCMoY4x>p<~cI%wF-jU;SyZ3p`AIb7( zC}a;&AAwxWA^T>9Ayi^ws*47_?8)>JY~qH7^|By(cj{!qu{jO5tz_9`rp3Uiw_0|s z*_==98Py~Fa9z4N*7hFNSYlcx@!8@YB?T28LnZ;{eOX7aUt#T?J-Q07fOT=^2!Ta( zDW?py!kX2*DmN^}RXE88nDPd_~A*NcFpw zVmeo-sV1bm_YY-;-aj+7pOk*(Xkx>LQEsL=?w!3!Z5@y4Ak{rsWXq7bk9!wAd0fX^ zkBHWSw(8tZ4)qzLgvN=OLdb(fLsiDi1L)Q`@YoWslzsLH$5AiRp$7u1P|Yl5P^x?B zluHXVuO)Y+@?qU=6(D=4N<~rJsm<8jv$SB7*G&Ij(#pB80p}h%Co}oQwr*lKJhNs&}#W^^!;<} z3x&g%06p>RTc_%G>k5v2h?AyelQfHgnTS7~$X#r8!cg7cG@zLXN_Bky_P?&!4tXc3}(q$cKyJAeA5pUq!*dpJP zN;O`Mx@&eT;ZP)yXcA*ZaRW0?xZ#x|mvQNCC9GsTo_NLmYDG-l{gc%}N`d=nY*3bM z%iQ~VS#RUo%a;B@%5Hp*F-m1m?AKNeRrWW>30oS10|W_uev{EL?Gq=2u8d%_Q1d8U z8{qxv%;Jt(#yY$EYuN)cnyxvRVWg4|rkaK?MwIx*GJJHwL(JHAH;jiJ-y<` z6Z>CdnL-}#U<>~Z-b!(|G~qdNvh`W~(zmNib;hUYGlMY@al<m9$eUzEdd(-y*4L@&&`<+3rc6rBa%k3}@Vgz$Ut+IN*u`8%kuo;!m zBh^Q2zDw=k(FF;6&`7Y{A00Md0-BrIm$s+Z=OUE#?rdB5Ypjg|_Ltq4zk@139;ZRa z4{f5JTs)I?14WFUXcqa|I*LxHD`?3i^h+>Xx;zg$VUU{RivvmXrZS!LjmCG$Sy90? zC~UcvYAfPrto$i-q8zrg_5^+)EaOlwtk@Zvs%lb0~#XvgG^X5Cp?=bNpNox>L^ zF>6BMjEH==Cq`-6$iy9Gd2aPJ*Fvgr0|jn~iiCwYD~KvEve*GC8Z3IIrCi1z<;m&S zbk5^~($MWF?*PUCGWI-U5-543skem>oqG(2oE)B#!BVSw3HDr}m7VfnLK-xRRk=V= z1D&vCZjx2DU$vrVi1oD{=U}cv`tLEgRa;l$Z{q$+1B90ZtHLjc1!(=hl;E!y_YZ~N zQvU-WP9C4|H~_T%6P!kpT|Pgyzuoo2f=@7ZPS5x}u=sX|-RC!kzWXQ+XuOHnKH<%s zC8nRSDkbwzl=$uagj26R3(o)7_x??d_y4y--!w)p&t91wdFN}aWZ`Ekw2h4${b{_M zg{#G%2cG|jfeuX?KA-7pa{fn5x;t2k&>)K$ekNg$Vrmq(5hzG8lK&09|8`@Y1thFf zsXF8UI4kWk+FT#-$*86t_r%0ETNxYdvk6_^+P5SON zX&^Bk{gCZe7EiDoc73ooIw8Np`TDhM5dxmWw}5R;oS3dqLhe>Tw_60SD$A_rjdf1{@@)nuQI_bJY(8ZpDnA3Z*K#fvd#P1z z5@UQlND&(cqbnm*?<$VB+K^JR&b9*@UW+)^gDLsIr9tzI;#oD#3EUAcjn_Ke-M%II zn84s+qODFmeFju2CMUvukS+>Z)5ce#!^-B~yxQ$E+vZub1_0-HJPvCb3N-|ldJ_LP zS^$){UESLf(N9>aLK{_5PDP+L-SFM0sCXcIN;i9HP|D1&vmK?1K3ebl*H@1wx)Ury zJz=S|6pe(zM_nmSiVa%>EH&1Z`8yDfW7U21In3;=Z6Ivjd$sr$IP5)7>ee(j4sWWg ztVAFj)K)E+-=L9%2Nl zqL6DyCK-F`w;KSTaf`0?+ySh?#(nmod*`Xjc0lqVNw|-}HtBeu#Na zlb7Uic5z<#-W3H#reOE7h5Q?tV7^P3C<}*PMmfk;`M2XJ7vs%U7G6 Z+;lJPf;{CG@UNx^_43 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Delta typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalFragmentC typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalFragmentD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalLoadTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalStoreTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    IgemmConfig typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedLoadTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreFragmentD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTileTraits typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html new file mode 100644 index 0000000000..555b0ce9b5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.html @@ -0,0 +1,518 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >:
    +
    +
    + + +cutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ > Base
     The base class. More...
     
    typedef IgemmConfig_ IgemmConfig
     The config. More...
     
    typedef Base::Scalar Scalar
     The scalar type of the epilogue. More...
     
    typedef Base::Iterations Iterations
     The iterations. More...
     
    typedef Base::Delta Delta
     The iterations strides. More...
     
    typedef Base::GlobalLoadTileTraits GlobalLoadTileTraits
     The traits class for the iterator. More...
     
    typedef GemmGlobalIteratorCd< GlobalLoadTileTraitsGlobalLoadIteratorC
     The iterator to store to shared memory. More...
     
    typedef GlobalLoadIteratorC::Fragment GlobalFragmentC
     The fragment that needs to be produced by the load iterator. More...
     
    typedef IgemmGlobalLoadTransformer< GlobalFragmentC, Scalar >::Transformer GlobalTransformerC
     The transformer from loaded data to math fragment. More...
     
    typedef Base::GlobalStoreTileTraits GlobalStoreTileTraits
     The traits class for the iterator. More...
     
    typedef GemmGlobalIteratorCd< GlobalStoreTileTraitsGlobalStoreIteratorD
     The iterator to store to shared memory. More...
     
    typedef GlobalStoreIteratorD::Fragment GlobalFragmentD
     The fragment that needs to be passed to that store iterator. More...
     
    typedef IgemmGlobalStoreTransformer< Scalar, GlobalFragmentD >::Transformer GlobalTransformerD
     The transformer from accumulators to shared memory fragments. More...
     
    typedef Base::SharedStoreTileTraits SharedStoreTileTraits
     The traits class for the shared iterator to store D to shared memory. More...
     
    typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kGlobalSharedStoreIteratorD
     The shared iterator to store D to shared memory. More...
     
    typedef SharedStoreIteratorD::Fragment SharedStoreFragmentD
     The fragment that needs to be passed to that store iterator. More...
     
    typedef IgemmSharedStoreTransformer< typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD >::Transformer SharedStoreTransformerD
     The transformer from accumulators to shared memory fragments. More...
     
    typedef Base::SharedLoadTileTraits SharedLoadTileTraits
     The traits class for the shared iterator to load D from shared memory. More...
     
    typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorD
     The shared iterator to load D from shared memory. More...
     
    - Public Types inherited from cutlass::gemm::GemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >
    typedef EpilogueFunctor_::Scalar Scalar
     The scalar. More...
     
    typedef IgemmConfig_ ::OutputTile OutputTile
     The output tile. More...
     
    typedef Shape< 1, IgemmConfig_ ::MultiplyAdd::AccumulatorsPerThread::kH/IgemmConfig_ ::kAccumulatorsPerLdsB, IgemmConfig_ ::kAccumulatorsPerLdsB > Iterations
     The number of iterations in the epilogue. More...
     
    typedef Shape< 0, IgemmConfig_ ::kAccumulatorsPerLdsB *(IgemmConfig_ ::Warps::kH *IgemmConfig_ ::MultiplyAdd::ThreadsPerWarp::kH - 1), 0 > Delta
     
    typedef EpilogueFunctor_ Functor
     The functor to do the math in the epilogue. More...
     
    typedef GemmSharedStoreTileDTraits< typename Functor::Scalar, typename IgemmConfig_ ::OutputTile, typename IgemmConfig_ ::Warps, typename IgemmConfig_ ::MultiplyAdd::ThreadsPerWarp, IgemmConfig_ ::kScalarsPerStsD, 128/sizeof(typename IgemmConfig_ ::ScalarD)/IgemmConfig_ ::kScalarsPerStsD/2 *IgemmConfig_ ::kScalarsPerStsD > SharedStoreTileTraits
     The traits class to build the iterator to store to shared memory for D. More...
     
    typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedStoreIteratorD
     The iterator to store D to shared memory. More...
     
    typedef Copy< typename SharedStoreIteratorD::FragmentSharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef GemmSharedLoadTileDTraits< typename Functor::Scalar, typename IgemmConfig_ ::OutputTile, typename IgemmConfig_ ::Warps, typename IgemmConfig_ ::MultiplyAdd::ThreadsPerWarp, IgemmConfig_ ::OutputTile::kH/ShapeCount< Iterations >::kCount, IgemmConfig_ ::kScalarsPerLdsD, SharedStoreTileTraits::kSkew > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for D. More...
     
    typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > SharedLoadIteratorD
     The iterator to load D from shared memory. More...
     
    typedef GemmGlobalTileCdTraits< typename IgemmConfig_ ::ScalarC const, Shape< 1, IgemmConfig_ ::OutputTile::kH/ShapeCount< Iterations >::kCount, IgemmConfig_ ::OutputTile::kW >, Shape< 1, ShapeCount< typename IgemmConfig_ ::Warps >::kCount, IgemmConfig_ ::kWarpSize >, Iterations::kW, IgemmConfig_ ::kScalarsPerLdgC > GlobalLoadTileTraits
     The traits class to build the iterator to load data from global memory for C^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > GlobalLoadIteratorC
     The iterator to load C. More...
     
    typedef Copy< typename GlobalLoadIteratorC::FragmentGlobalTransformerC
     The transformer for C. More...
     
    typedef GemmGlobalTileCdTraits< typename IgemmConfig_ ::ScalarD, Shape< 1, IgemmConfig_ ::OutputTile::kH/ShapeCount< Iterations >::kCount, IgemmConfig_ ::OutputTile::kW >, Shape< 1, ShapeCount< typename IgemmConfig_ ::Warps >::kCount, IgemmConfig_ ::kWarpSize >, Iterations::kW, IgemmConfig_ ::kScalarsPerStgD > GlobalStoreTileTraits
     The traits class to build the iterator to store data to global memory for D^N. More...
     
    typedef GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > GlobalStoreIteratorD
     The iterator to store D. More...
     
    typedef Copy< typename GlobalStoreIteratorD::FragmentGlobalTransformerD
     The transformer for D. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GemmEpilogueTraitsHelper<IgemmConfig_, EpilogueFunctor_, Index_> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::Delta cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Delta
    +
    + +
    +
    + +

    ◆ GlobalFragmentC

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GlobalLoadIteratorC::Fragment cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalFragmentC
    +
    + +
    +
    + +

    ◆ GlobalFragmentD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GlobalStoreIteratorD::Fragment cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalFragmentD
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorC

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalLoadTileTraits> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadIteratorC
    +
    + +
    +
    + +

    ◆ GlobalLoadTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::GlobalLoadTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadTileTraits
    +
    + +
    +
    + +

    ◆ GlobalStoreIteratorD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorCd<GlobalStoreTileTraits> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreIteratorD
    +
    + +
    +
    + +

    ◆ GlobalStoreTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::GlobalStoreTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreTileTraits
    +
    + +
    +
    + +

    ◆ GlobalTransformerC

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmGlobalLoadTransformer<GlobalFragmentC, Scalar>::Transformer cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerC
    +
    + +
    +
    + +

    ◆ GlobalTransformerD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmGlobalStoreTransformer<Scalar, GlobalFragmentD>::Transformer cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerD
    +
    + +
    +
    + +

    ◆ IgemmConfig

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmConfig_ cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::IgemmConfig
    +
    + +
    +
    + +

    ◆ Iterations

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::Iterations cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Iterations
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::Scalar cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef TileLoadIterator<SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadIteratorD
    +
    + +
    +
    + +

    ◆ SharedLoadTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::SharedLoadTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreFragmentD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef SharedStoreIteratorD::Fragment cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreFragmentD
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef TileStoreIterator<SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal> cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreIteratorD
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef Base::SharedStoreTileTraits cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTransformerD

    + +
    +
    +
    +template<typename IgemmConfig_ , typename EpilogueFunctor_ , typename Index_ >
    + + + + +
    typedef IgemmSharedStoreTransformer<typename IgemmConfig::Accumulators::Element, SharedStoreFragmentD>::Transformer cutlass::gemm::IgemmEpilogueTraitsHelper< IgemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTransformerD
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogueTraitsHelper.png new file mode 100644 index 0000000000000000000000000000000000000000..5a6a369859066ce0cab64b72c2132cad7d0b7438 GIT binary patch literal 1613 zcmcK4doye5>yX_#4rsI()hC+-OV@%4*qm@C+*tL>kG~}37J0`O< zBQaiiKbCe3i*h=V0GJGl@v3lW>^|Y@3Qha)9z>U|tt}{A1s6*@uH1p5f4Wd8JYOI03oTV+ z2{<1BI`p}X_Yo}sVB0gS^GRYnY&q{q#*5>-oEd$>Pg_PaxJWIA zOWyOQ{S9Wxp?=!Y!I>pg)W=i!BUrS$=nV6WGRU0QIN643?o5@fX=AUauDv(HTDJ0a z>0{+pNi`w#30hwvN^h2M_k~rGc(6uFn$U5xh#{F)^qigJyKhc)WpWf#5u(fvR>gs` zpGHfV#OK-+W_4nYjQh&BHGjtIhXYS%DqlPtoG_L_=64-aw3LjO*c*0I6Gete9r6hE0l{|)YZCp*a$kUOT z{kjlAAq49Aa2S?{*tTS*PK33TSeF}9mR&FMuNxHB9kkA_k&Hy3Dp+def?U7o)l}In z8?5iUekaKiXp8QMl=wOitdX_Ke}0NAS6sC4dc{+8XvyWG;a&P-St;3w$Dhu)xqlM9 z*8o{^@55fw>$j(^(50dvG3W_1l-ElY@CD2aIM{dKZsRn_6a~-AR7wMBElJaTQbrX; zEJ`5Dh0nOwxhP~PkF&HqRvSmg5oniH6!&JQTMt*D-J{3&M0SgYwXscl1wNem z@1qbD4r|zdE`_$HXwi=wue(w`BE#D%+Xx?6wXmnA;7ctzTm&`K(!Zd`WkC@wftlRb zp=>>!%9lS+&bs|f~efh2vytVE)VsjQ$;8Z_e{GUt|O%b+fpmM=*~MPkBcVhoyEp4{ThWlsr%S zo@}7uf@lPns|3W-kOL2fTG~e1V%>^IXn+ZH^~@$!DHp((j7r_d%qqSC&#GkYUDnE5 z$7)7q1*Et)Zaen6NfDHA?~9E9*pQm7XA5WNw1j`(FA#i`7On|Yd#D{<)ErxL6}=kU zB?-ml7G$nCT6U59@_f4q(QVdn!L3PRW94JIlekzxCbhL|N=z33Q~82j0biW|LT4at z#V%Rdw^qBnHiF_%Y3qQ_p<4;SvXD2r|H^pG;|FBiRG(bY)_~{Kc)R6YnEa)?D@)ej z!Z#IOv$kFNwR!j|<>5OTH|nar`YX|J9HNllmO7UA}e_8wU{Ka*?RD_ph zE&{%Ihnwt + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Base typedefcutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >
    epilogue(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    epilogue_with_or_without_beta(Coord< 3 > const &block, Accumulators &accumulators)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    Functor typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GemmEpilogue(Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    IgemmEpilogue(typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >inline
    Index typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Iterations typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    mcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ncutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    OutputTile typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    paramscutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Scalar typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarC typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    ScalarD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_load_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    shared_storagecutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    shared_store_fence()cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >inline
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedLoadTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStorage typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Traits typedefcutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html new file mode 100644 index 0000000000..37e7daa84f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.html @@ -0,0 +1,275 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >:
    +
    +
    + + +cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmEpilogue< GemmEpilogueTraits_ > Base
     The base class. More...
     
    - Public Types inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    typedef GemmEpilogueTraits_ Traits
     The traits class. More...
     
    typedef Traits::Params Params
     The params. More...
     
    typedef Traits::SharedStorage SharedStorage
     The shared storage. More...
     
    typedef Traits::OutputTile OutputTile
     The output tile. More...
     
    typedef Traits::Iterations Iterations
     The number of iterations. More...
     
    typedef Traits::Accumulators Accumulators
     The accumulators. More...
     
    typedef Traits::Scalar Scalar
     The scalar. More...
     
    typedef Traits::Functor Functor
     The functor in charge of the math. More...
     
    typedef Traits::GlobalLoadIteratorC GlobalLoadIteratorC
     We do not support 3D or 4D shapes. More...
     
    typedef Traits::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Traits::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Traits::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Traits::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Traits::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Traits::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to load D in shared memory. More...
     
    typedef Copy< typename SharedLoadIteratorD::Fragment > SharedLoadTransformerD
     The shared load transformer for D. More...
     
    typedef Traits::Index Index
     The index. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmEpilogue (typename Base::Params const &params_, typename Base::SharedStorage &shared_storage_, typename Base::Index m_, typename Base::Index n_)
     Ctor. More...
     
    - Public Member Functions inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    CUTLASS_DEVICE GemmEpilogue (Params const &params_, SharedStorage &shared_storage_, Index m_, Index n_)
     Ctor. More...
     
    CUTLASS_DEVICE void epilogue (Coord< 3 > const &block, Accumulators &accumulators)
     Execute the epilogue. More...
     
    template<bool kBetaIsZero_>
    CUTLASS_DEVICE void epilogue_with_or_without_beta (Coord< 3 > const &block, Accumulators &accumulators)
     
    CUTLASS_DEVICE void shared_load_fence ()
     The memory fence for shared loads. More...
     
    CUTLASS_DEVICE void shared_store_fence ()
     The memory fence for shared stores. More...
     
    + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Attributes inherited from cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >
    Params const & params
     The params. More...
     
    SharedStorageshared_storage
     The shared storage. More...
     
    Index m
     The dimensions of the GEMM. More...
     
    Index n
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + +
    typedef GemmEpilogue<GemmEpilogueTraits_> cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::Base
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmEpilogue()

    + +
    +
    +
    +template<typename GemmEpilogueTraits_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmEpilogue< GemmEpilogueTraits_, true >::IgemmEpilogue (typename Base::Params const & params_,
    typename Base::SharedStorageshared_storage_,
    typename Base::Index m_,
    typename Base::Index n_ 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmEpilogue_3_01GemmEpilogueTraits___00_01true_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..5b7af1e91a831174d9e597a3b953df02c8b884e3 GIT binary patch literal 1176 zcmeAS@N?(olHy`uVBq!ia0y~yU`z(G12~w0P@kSwWXgd=rx_|#pZuO9t@2On@y( zi*B7ay}4)p{JVz>bN22reSG)vzD?EZ)z;qIr1-FX5@StLLqP&x#eo z&;Hu5e}CYANFzViNc8Q+7s>Pg`{^C1d%xJQa=)tT%9V=A?N$E|uKQ5n7?ojtI5vIf zmHp>VU5hWv68zS_^BY6IW_U0Z^VqgMn&tL%`oZDk%*f zAl6<1^^^t&AS**Pu^|FT(-i-0@A1}D4fWDYT*R=1p=nAFW71S4d^PdUuaDo~_*>Mx zySw}Du44@Bv0LLm)t*~%ap%E**~b|C!gtLrcIVBzv0X?mdC!{ri@tyUvt`Z7zcG`~ z{{DH>_Kot7NjpmS?O4A*Pj=Rps?)RP&D$Jre}8@0_3!1c59sLI@!U=>>3J*R6Y!#? z(%%1|ZH!0E^`1j=z1x2ODK9EBl;3p!cg7jHf{!;{ze?PlWpRw}>%ZpM2M?QMjs=y* zJ&>6H>w4s^U#I(PZ+ivID!lp6;HF^0f{MsvdHOvP^?D7f!g8e3=Q=VL=j_iM`EykpfB&x7@%()5mSbM_x%p{THkbBB z1%8xxmmI#-O?2g|RMwmwmi3x%)VY>zESU1TDB^)Y_*?59_OCl0U0i#-{r0!w@Vx(P z*M6J4`TkE89O*7G@m;VWg|Wb{lkJRmwZuUu{8^y2b@ zhTi3G*Kgf@{6_o&2HCm3GO-#A`>!~EJH2s7&zom3_>>89&q@c r&FaAQVD= + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmFloatToInt8Converter< kElements_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html new file mode 100644 index 0000000000..35a75abea8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmFloatToInt8Converter.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmFloatToInt8Converter< kElements_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmFloatToInt8Converter< kElements_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment< float, kElements_ > InputFragment
     The input fragment. More...
     
    typedef Fragment< int8_t, kElements_ > OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmFloatToInt8Converter ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (InputFragment const &src, OutputFragment &dst)
     Transform a fragment. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void transform (Fragment_ const &src, int offset, OutputFragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<float, kElements_> cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<int8_t, kElements_> cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmFloatToInt8Converter()

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::IgemmFloatToInt8Converter ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::transform (InputFragment const & src,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<int kElements_>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmFloatToInt8Converter< kElements_ >::transform (Fragment_ const & src,
    int offset,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html new file mode 100644 index 0000000000..0dc73a822f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html new file mode 100644 index 0000000000..43ba1df7dd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef Convert< InputFragment_, Fragment< OutputScalar_, InputFragment_::kElements > > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename InputFragment_, typename OutputScalar_>
    + + + + +
    typedef Convert<InputFragment_, Fragment<OutputScalar_, InputFragment_::kElements> > cutlass::gemm::IgemmGlobalLoadTransformer< InputFragment_, OutputScalar_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html new file mode 100644 index 0000000000..1e2db95279 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html new file mode 100644 index 0000000000..181687128f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalLoadTransformer_3_01Fragment_3_01int8__t_00_01kElements___01_4_00_01float_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef IgemmInt8ToFloatConverter< kElements_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef IgemmInt8ToFloatConverter<kElements_> cutlass::gemm::IgemmGlobalLoadTransformer< Fragment< int8_t, kElements_ >, float >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html new file mode 100644 index 0000000000..7d045266bb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html new file mode 100644 index 0000000000..134180f5a3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename InputScalar_, typename OutputFragment_>
    + + + + +
    typedef Convert<Fragment<InputScalar_, OutputFragment_::kElements>, OutputFragment_> cutlass::gemm::IgemmGlobalStoreTransformer< InputScalar_, OutputFragment_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html new file mode 100644 index 0000000000..4a2355422e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html new file mode 100644 index 0000000000..4d6a68c148 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmGlobalStoreTransformer_3_01float_00_01Fragment_3_01int8__t_00_01kElements___01_4_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef IgemmFloatToInt8Converter< kElements_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef IgemmFloatToInt8Converter<kElements_> cutlass::gemm::IgemmGlobalStoreTransformer< float, Fragment< int8_t, kElements_ > >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html new file mode 100644 index 0000000000..0e462c7366 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html new file mode 100644 index 0000000000..00a382f7c1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmInt8ToFloatConverter.html @@ -0,0 +1,265 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + + + + + +

    +Public Types

    typedef Fragment< int8_t, kElements_ > InputFragment
     The input fragment. More...
     
    typedef Fragment< float, kElements_ > OutputFragment
     The output fragment. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmInt8ToFloatConverter ()
     Ctor. More...
     
    CUTLASS_DEVICE void transform (InputFragment const &src, OutputFragment &dst)
     Transform a fragment. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void transform (Fragment_ const &src, int offset, OutputFragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ InputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<int8_t, kElements_> cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<int kElements_>
    + + + + +
    typedef Fragment<float, kElements_> cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmInt8ToFloatConverter()

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::IgemmInt8ToFloatConverter ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform() [1/2]

    + +
    +
    +
    +template<int kElements_>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::transform (InputFragment const & src,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ transform() [2/2]

    + +
    +
    +
    +template<int kElements_>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmInt8ToFloatConverter< kElements_ >::transform (Fragment_ const & src,
    int offset,
    OutputFragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html new file mode 100644 index 0000000000..dcdcd512a4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html new file mode 100644 index 0000000000..089013d38e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSharedStoreTransformer.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_epilogue.h>

    + + + + +

    +Public Types

    typedef Convert< Fragment< InputScalar_, OutputFragment_::kElements >, OutputFragment_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename InputScalar_, typename OutputFragment_>
    + + + + +
    typedef Convert<Fragment<InputScalar_, OutputFragment_::kElements>, OutputFragment_> cutlass::gemm::IgemmSharedStoreTransformer< InputScalar_, OutputFragment_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html new file mode 100644 index 0000000000..801a953011 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmSwizzle< GlobalIterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html new file mode 100644 index 0000000000..6f2c5963fb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmSwizzle.html @@ -0,0 +1,273 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmSwizzle< GlobalIterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmSwizzle< GlobalIterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_swizzle.h>

    + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GlobalIterator_ GlobalIterator
     The global iterator. More...
     
    typedef GlobalIterator::Fragment Fragment
     The source fragment. More...
     
    typedef GlobalIterator::FragmentShape FragmentShape
     The shape of the source fragment. More...
     
    typedef Fragment InputFragment
     The source fragment. More...
     
    typedef Fragment OutputFragment
     The destination fragment. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE IgemmSwizzle ()
     The src/dst must be int8 fragments. More...
     
    CUTLASS_DEVICE void transform (Fragment const &src, Fragment &dst)
     Transform a fragment. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::Fragment cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::Fragment
    +
    + +
    +
    + +

    ◆ FragmentShape

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator::FragmentShape cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::FragmentShape
    +
    + +
    +
    + +

    ◆ GlobalIterator

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef GlobalIterator_ cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::GlobalIterator
    +
    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::InputFragment
    +
    + +
    +
    + +

    ◆ OutputFragment

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + +
    typedef Fragment cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::OutputFragment
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ IgemmSwizzle()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::IgemmSwizzle ()
    +
    +inline
    +
    +

    The number of elements must be a multiple of 4. Ctor.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ transform()

    + +
    +
    +
    +template<typename GlobalIterator_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::IgemmSwizzle< GlobalIterator_ >::transform (Fragment const & src,
    Fragmentdst 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html new file mode 100644 index 0000000000..e26b4c6160 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.html @@ -0,0 +1,101 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperA< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA.png new file mode 100644 index 0000000000000000000000000000000000000000..d055e0299931442f5602c239c13e878785d5146e GIT binary patch literal 1401 zcmcJPYfzGD6vtm}YfULEH^a(VD~P-mXewp1ZD_*lZ4F5s*So2k#A2afi9r`})HaoL zw7lh*y5%aeikBSa)%7y!O6IN?aKtGUH9~9!6U zw?>I1Uw=Tq^v4?=AUgoSRwK~;EIHXgS)$}Aj>6pG%BSlevRP=X>Dw6BD~!grO6%*d zenjQqi0@-2o9r8@lX7ck&=};E8Qw;=+G2>jwvS+r7@-qK>?_O8fWZrIwULp5wyJ;^ zZ{U2@ptcX*UW4>;ayr=`b4f6d20zsiu&1kWrPVB9fMa$u)|)3vdwIH`wMR&0MNXVw z8X%kyr2w^?`@4J4{*>p!)L?1;#8A9ua3<*r>-`u|tNo?19Zfbe;9oNpoovmHrvT4rK#)pwt)!$aE`c9DuXM0LhAUd;+k1Ap%(2d(;YO(U1Pu zC^k6wul7!4sCza@tr_*~?Kr+iS`?~+Si@8ejkJ2c$sQ{Rj&Y&386w(~TMMFSqAG^fMF z*qk8P%0_F%O2TM;rysrFwf$~{kx)Zx8ZRyDiYrZcw`Jl5dB$c@76ofi1o*>z68L+g z?T;4l?)tOjq!4Qn^ z7W!Pu7BCPgV#Q-r*J|)Wv!m)dRoHY&r%tcY#)!m~z7FO=lI@)HglS;x*_(HZhQN3t zEiGC_^Vp=qvevgf8jW$|WG7M5T2~q_?u28Y{SNsPF4fO#BAxd@j|RCw*=zA`k*O@L wWbF`*V+yV7Wz@_1PjsC*B+v3Z($!zX#hHE;viFCx^mh^f&-uF7;UjMS4xDVCzW@LL literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..6a93f43b2e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..27a96f2a98 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,218 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^N. More...
     
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kW *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsASharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^N. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarA Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgA > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for A^N. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kW *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsA > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for A^N. More...
     
    typedef GemmSharedLoadTileATraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsA, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for A^N. More...
     
    + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerStsA = 16
     The number of scalars per LDG/STS/LDS for A. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kA, MatrixLayout::kColumnMajor, int8_t const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, 4> cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kW * 4>, typename GlobalTileTraits::Threads, kScalarsPerStsA> cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kScalarsPerStsA

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::IgemmTileTraitsHelperA< MatrixLayout::kColumnMajor, GemmConfig_ >::kScalarsPerStsA = 16
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperA_3_01MatrixLayout_1_1kColumnMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..e13efd743b799e5770b533ede944e8708cca4c27 GIT binary patch literal 1674 zcmb`IdpOf;0LOpg9IQs?=yB<|oEY7nkj%B29=il=_&-=dL_j$h0^ZxVsyl>_noI4b<8Ug?S z6zk!F2LQEsFz?Y)2UkbyuuI@(xW{v!t4gH;r;sNle6^F;| z1;EljqyNP|I{^6E2mq zF&iEuJTsif2x41dwXyyh_n5Hi^L>Z&2X>OIv7u|=Mdh`&Jx!>qFa_k(q$ahT-7ygW`d>wJ2PW8;Wr$i-HMH@UeT)dX9pUlT#9DOis+SiCFD*+iEbWo9@f zS0zO)ro!6$Pnu)7GqgtMBNLz7cjGfC?*p^O?U8W7MZB?~&vPp@N4 z@bwH17LPLOrB!zS;RYl}5np*b>Xkf?xN$*7 zkmI;Oan^)6(T|Bq?_AEgph}&gqf0HVGgjNKnK-=m7cOmDbeYB4@5`z3NMQC*js?Rp zx8ef?TO0WNaFXJBnT?b#@W;7x6#VHPloqCciB@l5k9l6-*Hl^>UG66p<4bu=%G5a- zeG6J6aPgj52%3?8x9+;cBqW*38>eZqoCc_F!}2MnS&q+Kur^%kNTlGMQ zsf}ftWiGH3^~P{0CAE9UV759(;bONInN&@y3po)Pz5UXa`==d%P{aKG-L0!H1Z zlKRag#GP||G>8nQwWA{cFN4`uPc+4&MUE_E1^dAw-rS0zZU+ksw7@KN9SRuf`Q?nEe zww(t@?P^;oX1ybO*SA>o5sW**c))ps&O?2CY)Wk|O}*-TpD|%mxex$();zfU-Zr*B zSOE$D-zxe8O@;VhZx=pVu@2BqasBU|dYqOc%4Yp3@;bS<0loS9?Z=p6!5yDY-388P zQG(_lfe*!RZ%o_j>b@b~1a3Nq)f8ykcD3}QVCD%+HNw{CI{7Pn%e2|>CSeYy8#G5f zEsC;i>r;Jid_?6!)Yntw!k0d~BxGM)|M|7%Qq}?R6&- zMFWAr3BTSPD0!M>{CIN=(I$P{^fhm<$+(McWMf`jmy=WU*^Z==>y!LuSzOr}NbyVR z84ZzX30!0!8N7{|xy2AKRv?*SHMGEoU7HBL&)huSe-wxKU7V9>7${EnM5UD@H?)2E z58{mKhUN?(6)Fo2XoptY*qbSJ+Zw+$ESOHh`yGPzO)kG46PtW&g+BcC_Ab%uy5YK~C{?oT`nr)=j7iyWH-!4APGERMD&Y#5 zHv5mR$P>Dc{>GU+wiISKxPicZ5|l=(3%sF+a8t-og7K~oyeXgHUGooE)6ed3j3IC1 zgFnYaa=Ijv4>5fx5frT%v_;2WYVq7y{IQnA@4Uw9P(vJ`O9rzumwlJULB?2&ecyR^ zq!@Baesde!d*+GYz8X-{u4zGaNcyJDq4}sIKq4w!YsL<3FxJl0Jgqg#-RjR(s=pcQ zpqPgLWIGz-oZ>rMv+|5@ObTKwU&5szw=Ww7)%iE + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperB< kLayout_, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< kLayout_, GemmConfig_ > + +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB.png new file mode 100644 index 0000000000000000000000000000000000000000..27dbc2cd675ac84dfecdcb202e60d1e0be5f74a3 GIT binary patch literal 1400 zcmcJPdsNZ~6vuz0mSN6Slr*(7gB0g;K5I@Y9|I{mf(cF+0V^SSrjKR@UGf)5Eag?<4A z0KgQB@ecz4NV%Tz#s>O+EB~B+0l@LW!~mU6r^l(evEN1H(|URzp->pzlozQVjgN$d zgai7cl^PHeT>-#o4c31TF%L3vexjmu$GU(}r`}c_LIuNQwHMJRqg$p?M^6Yt#+fpW z#Lt9 zm?BmX8$Ws;5d>P{5a=d8O#Iim#1u^2tS_CSPjpJpSp!sR zaKyoHb0}4!=Y0rh&02`t-OXNJob*m;PG139ebkvbQfRbealA+2*A^EKAC*aS{&m%O z#Z`pfRldUR*ZK7JWAAo*1V>U=WG&#Y2inBV;O9s;Oz?3{yHm8!Gmi!oE&kmp*501- z*u~U9x5{oZJ#)rDMmYP@LH_8s%ByJ+e$a@lu9NMc?L9@O=Y6MQt6JC~g6Tz8X~>u8 zC2i*EjTyzQSWc2hvDIS=#I1Vf>zXUIy>e={T)08@aQjZLYyV}cn8gaa^mHnZ6C;6h z&tBM);OJg2;6B;4&hwAUb&Zosf2o84+h0fFZ60*;zSNF5-eUDeG}VeYmg_vL4l4A1 zfT9H%UFl5Rv34QZZHw0{3%#j)hPU3-rNaV-MaPjM!_Mp!+3iGVf9^%cyW};Bm+sV2 zW8B);-GfgXW_gx_E{l_q=y7=+4ZAQcwt(~~$1Q~M&02H<@SL@kmo>R0ecpjf&Up;< z-^?(cYGO6Gyh|_{otG8zI5zR`fc-UgEHXnNIWq0 jX&yP3)54%!8pvLTxXJOE8VR`aQ2}f~h<~H+cP0M-U=5I# literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html new file mode 100644 index 0000000000..c4c05c5827 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html new file mode 100644 index 0000000000..de98c371d8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.html @@ -0,0 +1,218 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >:
    +
    +
    + + +cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ > Base
     The base config. More...
     
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, 4 > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^T. More...
     
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/4, GemmConfig_::OutputTile::kH *4 >, typename GlobalTileTraits::Threads, kScalarsPerStsBSharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^N. More...
     
    - Public Types inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
    typedef GemmConfig_::ScalarB Scalar
     The input scalar. More...
     
    typedef GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar
     The scalar stored in shared memory. More...
     
    typedef GemmGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, Scalar const, Shape< 1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, GemmConfig_::kScalarsPerLdgB > GlobalTileTraits
     The traits class to build the iterator to load data from global memory for B^T. More...
     
    typedef GemmSharedStoreTileAbTraits< MultiplyAddScalar, Shape< GemmConfig_::kStages, GemmConfig_::OutputTile::kD/GemmConfig_::InstructionShape::kD, GemmConfig_::OutputTile::kH *GemmConfig_::InstructionShape::kD >, typename GlobalTileTraits::Threads, GemmConfig_::kScalarsPerStsB > SharedStoreTileTraits
     The traits class to build the iterator to store data to shared memory for B^T. More...
     
    typedef GemmSharedLoadTileBTraits< MultiplyAddScalar const, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, typename GemmConfig_::InstructionShape, GemmConfig_::kStages, GemmConfig_::kScalarsPerLdsB, 0 > SharedLoadTileTraits
     The traits class to build the iterator to load from shared memory for B^T. More...
     
    + + + + + + + + +

    +Static Public Attributes

    static int const kScalarsPerStsB = 16
     The number of scalars per LDG/STS/LDS for B. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >
    static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor
     The layout. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::Base
    +
    + +
    +
    + +

    ◆ GlobalTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef IgemmContiguousGlobalTileTraits< GemmOperand::kB, MatrixLayout::kRowMajor, int8_t const, Shape<1, GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kH>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, 4> cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::GlobalTileTraits
    +
    + +
    +
    + +

    ◆ SharedStoreTileTraits

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + +
    typedef GemmSharedStoreTileAbTraits< int8_t, Shape<GemmConfig_::kStages, GemmConfig_::OutputTile::kD / 4, GemmConfig_::OutputTile::kH * 4>, typename GlobalTileTraits::Threads, kScalarsPerStsB> cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::SharedStoreTileTraits
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kScalarsPerStsB

    + +
    +
    +
    +template<typename GemmConfig_ >
    + + + + + +
    + + + + +
    int const cutlass::gemm::IgemmTileTraitsHelperB< MatrixLayout::kRowMajor, GemmConfig_ >::kScalarsPerStsB = 16
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTileTraitsHelperB_3_01MatrixLayout_1_1kRowMajor_00_01GemmConfig___01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..0703b778b190c5ec1f9d34b079b7cb97882ed86e GIT binary patch literal 1634 zcmb`IX;hL~9EV@uk`!gsP%_uiP;$*t3mZ{S<`V8(W@sl{j^-998MvmUH60BVOj{)* z+9S6VFp_N2Luw{du9Qq>R#KuUD!I%%&BwmZJ?Gy0+3F(6j~)6C=$|}oV?@l1Vd!uUnzJo(IRgdopf@lcNDd4 zWcp}y&q`R^_vuczh^DGz+FB$lV60NG6D0edb*vT^Z5dI{0^}(ss- zO_;%oQlC_HMOMKI1t;j;cSk%oEZoljV9UJ1YCsRY3BH4Lw5>I>+{1Ltc3w&n7cR4k`y{=3-HZ{3VljK?-7k1G!U}n!Wb2#(SMEfx6 zDzQEMba)%gLGdbglJd6qldK;emq6z+&0GYBE=xlDjfQeaJrOnsaBgQ%^bgnI_IR(S z_%OW6e3}@DL94d`u=SJ+u zXKPFlB*eadt!p!~s6#uH>c-Gfej4_8txapIZWK>j$ssM{N1D^{z*6Lbx-Jldv~Uvg z7v}8rVKJy5D8@o)0HIhTOAR`!T?ZLkP^<^8u`ks%jE5Wspvzz}hEN2u`u~b_cwtBZ zHtG=pNx|g+2-sQv^n^8dLa>zg_rpy$mIrRQzw3E1Wd4QX)n&*c(4Oog71*e}ulP)e z078&=96{Et);*o5e*CQ5^n`7l!iIeKwoxuo1D9^hP~%=G<~C3nqwVs0Ri=y4P?-e8 zrYHmwq0%;?MC%#rID?bBn4jmCzq-YMps1b-KR~OZWLlDFE$(}A%1SGYax??3o7^AH z6d8G^C7rI*45ax!Sb1+*O)R$Cl~8fPUa@?>E{i%)_g{_dajN`LBebD=?4Fnr>86dI z#?8|8eB5?XYH@zxg}$NK(zR~>S-RXILS7&CIm2OoJkGdALo%xGG7FnFR>_W-D3d9Y z?54{&y}<@5`jTiA-MGm2qogxAI%e_Fl)H(b`SY@<@l#~|Dj}!vI)X!5s8q>izp zVIAwvsBR*gI>p-M;ZJIPu^trt!KA*->#ZWkwKes#gNSdKOJv&=#8mSqt!D~U9uZd7 zDA63uHo>mMNuA%eMt(+97(B3=?3S41=8H{`d%~2W{G~jB-HN|X*IZ@aj)Q!TcI$9p zY`w$pqeZM=SosLT$J)jIDL%7r=z#%O+luVvk(}MRjmNyd)!DW>m0T5|IrenB8_Y0W ziHN7QEPt~Gs5;Cp`QgJ;kJWI6x72I8GYe2eV^Gr3>5Mbt5?Mg#uN3<7x>wSV2&K20 zHg!Z->gaj?JwpZJiXv%KEk_Cf;r??!8NIOHja>=au$#~;Ysk(ACl4@#(9nkf;C(}U JINtFm{{coa^Hu-= literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html new file mode 100644 index 0000000000..23f0fe6d06 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Epilogue typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GemmConfig typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    Index typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    kLayoutAcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    kLayoutBcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    OutputTile typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarC typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    ScalarD typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html new file mode 100644 index 0000000000..92ff0ed8ab --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::IgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    typedef Helper_::GemmConfig GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef Helper_::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef Helper_::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef Helper_::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef Helper_::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Helper_::Epilogue Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Helper_::ClearAccumulators ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..7f98448cc867fef1d0174483b86d238631451982 GIT binary patch literal 3096 zcmdT_d05if8pbTwv}w^yQ*VVfT56+sHQbs>R4~ZYN-fFJkkH)01($5B%q17xMjez) zK~2$I6VqtQ6>%e1GRs6Gv|KS1^itEzz0Y(1y7S*X&vVZAp7Z;@_j}I!Jm>u-I3R2l z!<2bOFa!dX|zn7kekJR_HkF=T<@-g#t<%&)*MJ3>0}2-Jp<3{;54xrxI8s3GAh z5Oc7CB@W*{zw;nwf6F=mRh8z|eaq^y8#KjY!4`Lb#KRMogqLqOX$|ioF_xFF%)>sx zh19lr*Oe7(xfa(;_hNb;QtTbAQ9t-onS>jJ4^)REtNbL{D+6yncxa@><+yuoV$+-E zl#I+|l19t#k5v#}a`Ge(>u5@YmOZAehPd%bjhCDVa{4Up+M-udZ>Et>RB7})7AjY2 zkR24_=eC)UJSS)D5b!j20i}>#mm-g)!4KZ zGe+I>$;KJ_(Qza>@Er60oi*x3(l{8WzN*V>vk6% zE15Zuo35u4R>Ggg#HWwcT!>4Vx)O2<#*VOY?_)$D5!sVV^_ebD)g(iQO|TvhEaH)=(0*2O;;=NV7C*Hbx}R-RfDHj zM3GlkrO#`T9VJPH7xx5`U!>9HW%~<5l$j88z}$CedGE+rK}qzY8c}io+oFALRq6b! z4N~onJY>C_;dN@&6GVwyJRuVnY^*|RoILY6Q7YbuuTE%n=W?=8m+x*pU-EUW9`{y9~T6+8+|L#);f(lBjDq7vitfqlw@3N{p!>;vHgqlUm z%^!)D89!A=cC%|EkP}D-10TF8ln!QQi{3dua6r9u;cvSBaogS^9gO<3!g4ca{wYc@ zipQ(V>^^t5&3n3>=0LFufN>-1E45okxZtHZZHEtVA{D>4+wF-Q^Vz2{C6lXfNgo2| zj;A*2m|RZDj^hCn!H6%dv?71bLD+ZOe(LUWzs|$1 zt_utXF;Kz#66IeDd|}wac8x|HO>mysv!lA7J5oG~(()BOta&+X5~@x__Sufr54yTs z9U7hgyeQB){lY%d{5FU7x-2?m=GimLQmuC!yy1{Z%|#CARKU}M1tozp`_OO5_h{vQCp&G!KilSTo1PT1aTM%2G8QPhv}t~O?R-kq4tw49Kgri-)W(Ts!gmcm z(-5HpMe9Et&pr8XIo>NQK9AgWh6T#S}BSrS%pDD?4IMr{2 z?a%%q6nnDNR|b{XplXEV*M2+crY;nTul=Mv#18%^t>V9FrT^;u|41vGOml>3mt>xS z>EvcQ8tRl>b%bGL?7rR8LmdA%efEd=as|}ZwX_0F5A~M*zId~maYOT|Vz}Pf$cMe` z>?oy%iH;MeR*Ul?L$tMH$}8*rZW413I^{^f&Lh12r{drE)rJ9RFBt(Gz-O__u}J~~ zA}m?IpW90{EoS9&Q3l(Hy#@TZHVK~{?8)LVutNVkKl_w}wP9dUD+Zt^F41fQD+Cr~ zevFO^-V%LY4G-O@jp=J2+vxZ-)q$6Lt=jwT$E2a>)C&>STxY*(#Nrz>3Id;KRF;@u zkK1YZfSmXH+Te^~mhJ9|d{m3{42F}aC*Y*ypj>B|6Ug;R%me|N;rob+I(&~^WQ3v% z(Eh*O&I{X!$-+xP5&z~S+tB{Gnt0y)Ig-48k;er*8*_su^HGcfb%Wzgzl6gF^0n;JIQ)goU9UpO?->VefXbNV_1#r zsJcsF%VW0-jZ*E;oPy`7r=@Ow=wN6X`!$CjGp0v~-vUJzhqrX-Gs?tx2QBS&esJ=1 z^mAdFw@vD`w%s~f4RwBJea?FAGA+{*-9m)b=wwB}(cBF(!X#`)?WX_Hg9F^-CGKGL z)dpG)tgsSc&}`szfutg0m)>X_R;y`!5$owvCeiCjlrn|SPAh@aj|GObo8V{xkMY!f z!RL6ML&ZoXTqL_Gar?){2x!>Fv-g|ajI|3xlLLDlYJ8#qxFof89lQA?y5!cW;ohKV=#U(Dl~8J@@3uznt6Tq z@;}$6da?$qSzU~B{$llI{q&O_#!ZfIM+fGPZaDEpD5pBlq1oB|i|-8w2l+J{N&M;V z6`wY+iCjZy6)g7!IC#YVVsInwFjM(^f65a*Ud?bqV|MYl!REHu%@u6iz8ub+oI@d zlU%_`Emm{XTv^Z+L%{BdvGnFzKdT&Fx@OSvM;62L%@NA65|6iwt8&I#Qnx~%UzysF zJGvDpf9HOs5g;4!los^@ARbT%!kYS%$1nc^Ls-Vp literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html new file mode 100644 index 0000000000..9c138df718 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper-members.html @@ -0,0 +1,108 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + +
    ClearAccumulators typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    Epilogue typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GemmConfig typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GemmTileTraitsHelperA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GemmTileTraitsHelperB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadIteratorA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadIteratorB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadStreamA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalLoadStreamB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalTransformerA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    GlobalTransformerB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    MultiplyAdd typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadIteratorA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadIteratorB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadStreamA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedLoadStreamB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedStoreIteratorA typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    SharedStoreIteratorB typedefcutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html new file mode 100644 index 0000000000..3ac649b17b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTraitsHelper.html @@ -0,0 +1,441 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef IgemmConfig< OutputTile_, ScalarD_, AccumulatorsPerThread_ > GemmConfig
     The IGEMM config. More...
     
    typedef IgemmTileTraitsHelperA< kLayoutA_, GemmConfigGemmTileTraitsHelperA
     The GEMM config for A. More...
     
    typedef IgemmTileTraitsHelperB< kLayoutB_, GemmConfigGemmTileTraitsHelperB
     The GEMM config for B. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA::GlobalTileTraits, Index_ > GlobalLoadIteratorA
     The iterator to load A from global memory. More...
     
    typedef IgemmTransformerA< GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA >::Transformer GlobalTransformerA
     The default transformer for A. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorA
     The iterator to store A to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerAGlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB::GlobalTileTraits, Index_ > GlobalLoadIteratorB
     The iterator to load B from global memory. More...
     
    typedef IgemmTransformerB< GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB >::Transformer GlobalTransformerB
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorB
     The iterator to store B to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerBGlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorA
     The iterator to load A from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorA, Copy< typename SharedLoadIteratorA::Fragment > > SharedLoadStreamA
     The stream to load A from shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorB
     The iterator to load B from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorB, Copy< typename SharedLoadIteratorB::Fragment > > SharedLoadStreamB
     The stream to load B from shared memory. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef ClearAccumulators< typename MultiplyAdd::ScalarCClearAccumulators
     The object to clear accumulators. More...
     
    typedef IgemmEpilogue< IgemmEpilogueTraits< GemmConfig, EpilogueFunctor_ > > Epilogue
     The epilogue. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ ClearAccumulators

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef ClearAccumulators<typename MultiplyAdd::ScalarC> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::ClearAccumulators
    +
    + +
    +
    + +

    ◆ Epilogue

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmEpilogue<IgemmEpilogueTraits<GemmConfig, EpilogueFunctor_> > cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::Epilogue
    +
    + +
    +
    + +

    ◆ GemmConfig

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmConfig<OutputTile_, ScalarD_, AccumulatorsPerThread_> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GemmConfig
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTileTraitsHelperA<kLayoutA_, GemmConfig> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GemmTileTraitsHelperA
    +
    + +
    +
    + +

    ◆ GemmTileTraitsHelperB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTileTraitsHelperB<kLayoutB_, GemmConfig> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GemmTileTraitsHelperB
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadIteratorA
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadIteratorB
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ GlobalTransformerA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTransformerA<GemmTileTraitsHelperA::kLayout, GlobalLoadIteratorA>::Transformer cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalTransformerA
    +
    + +
    +
    + +

    ◆ GlobalTransformerB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef IgemmTransformerB<GemmTileTraitsHelperB::kLayout, GlobalLoadIteratorB>::Transformer cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::GlobalTransformerB
    +
    + +
    +
    + +

    ◆ MultiplyAdd

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef GemmConfig::MultiplyAdd cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::MultiplyAdd
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits, typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadIteratorA
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits, typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadIteratorB
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorA, Copy<typename SharedLoadIteratorA::Fragment> > cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorB, Copy<typename SharedLoadIteratorB::Fragment> > cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorA

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits, typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedStoreIteratorA
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorB

    + +
    +
    +
    +template<MatrixLayout::Kind kLayoutA_, MatrixLayout::Kind kLayoutB_, typename OutputTile_ , typename ScalarD_ , typename EpilogueFunctor_ , typename AccumulatorsPerThread_ = Shape<32, 8, 8>, typename Index_ = int>
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits, typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::IgemmTraitsHelper< kLayoutA_, kLayoutB_, OutputTile_, ScalarD_, EpilogueFunctor_, AccumulatorsPerThread_, Index_ >::SharedStoreIteratorB
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html new file mode 100644 index 0000000000..73e523bb0f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerA< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..b31cf3bbc9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..7135e2be7b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef IgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef IgemmSwizzle<Iterator_> cutlass::gemm::IgemmTransformerA< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..e8d627ef06 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..3c826b8ad1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerA_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef Copy< typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Copy<typename Iterator_::Fragment> cutlass::gemm::IgemmTransformerA< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html new file mode 100644 index 0000000000..fa606b244c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerB< kLayout_, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..ed73de85fd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..61c70cf98c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kColumnMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef Copy< typename Iterator_::Fragment > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef Copy<typename Iterator_::Fragment> cutlass::gemm::IgemmTransformerB< MatrixLayout::kColumnMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html new file mode 100644 index 0000000000..709672d2ff --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html new file mode 100644 index 0000000000..836dae8a0f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1IgemmTransformerB_3_01MatrixLayout_1_1kRowMajor_00_01Iterator___01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ > Struct Template Reference
    +
    +
    + +

    #include <igemm_traits.h>

    + + + + +

    +Public Types

    typedef IgemmSwizzle< Iterator_ > Transformer
     
    +

    Member Typedef Documentation

    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ >
    + + + + +
    typedef IgemmSwizzle<Iterator_> cutlass::gemm::IgemmTransformerB< MatrixLayout::kRowMajor, Iterator_ >::Transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html new file mode 100644 index 0000000000..165525471f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling-members.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html new file mode 100644 index 0000000000..d79eb6f660 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling.html @@ -0,0 +1,319 @@ + + + + + + + +Cutlass: cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ > Struct Template Reference
    +
    +
    + +

    Functor to compute linear combination of fragments. +

    + +

    #include <linear_scaling.h>

    + + + + + +

    +Classes

    struct  Params
     The parameters. More...
     
    + + + + + +

    +Public Types

    typedef Scalar_ Scalar
     
    typedef FragmentMultiplyAdd_ FragmentMultiplyAdd
     
    + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE LinearScaling (Params const &params)
     Ctor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void evaluate (Fragment_ const &accum, Fragment_ &output)
     Evaluate the functor. More...
     
    template<typename Fragment_ >
    CUTLASS_DEVICE void evaluate (Fragment_ const &accum, Fragment_ const &old, Fragment_ &output)
     Evaluate the functor. More...
     
    + + + + + + +

    +Public Attributes

    Scalar alpha
     The alpha/beta scaling factors. More...
     
    Scalar beta
     
    +

    Member Typedef Documentation

    + +

    ◆ FragmentMultiplyAdd

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    typedef FragmentMultiplyAdd_ cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::FragmentMultiplyAdd
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    typedef Scalar_ cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Scalar
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ LinearScaling()

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::LinearScaling (Params const & params)
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ evaluate() [1/2]

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::evaluate (Fragment_ const & accum,
    Fragment_ & output 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ evaluate() [2/2]

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::evaluate (Fragment_ const & accum,
    Fragment_ const & old,
    Fragment_ & output 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ alpha

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::alpha
    +
    + +
    +
    + +

    ◆ beta

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::beta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html new file mode 100644 index 0000000000..4342065c6c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params-members.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html new file mode 100644 index 0000000000..605e0b8f90 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1LinearScaling_1_1Params.html @@ -0,0 +1,179 @@ + + + + + + + +Cutlass: cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params Struct Reference
    +
    +
    + +

    The parameters. +

    + +

    #include <linear_scaling.h>

    + + + + + + +

    +Public Member Functions

    template<typename GemmDesc_ >
    CUTLASS_HOST_DEVICE int initialize (GemmDesc_ const &desc)
     Initialize the parameters. More...
     
    + + + + + + +

    +Public Attributes

    Scalar alpha
     The alpha/beta scaling params. More...
     
    Scalar beta
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    +
    +template<typename GemmDesc_ >
    + + + + + +
    + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params::initialize (GemmDesc_ const & desc)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ alpha

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params::alpha
    +
    + +
    +
    + +

    ◆ beta

    + +
    +
    +
    +template<typename Scalar_, typename FragmentMultiplyAdd_ = FragmentMultiplyAdd<Scalar_>>
    + + + + +
    Scalar cutlass::gemm::LinearScaling< Scalar_, FragmentMultiplyAdd_ >::Params::beta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html new file mode 100644 index 0000000000..6b5a395835 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand.html @@ -0,0 +1,97 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< operand, Kstrided > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< operand, Kstrided > Struct Template Reference
    +
    +
    + +

    #include <gemm_operand.h>

    +

    Detailed Description

    +

    template<GemmOperand::Kind operand, bool Kstrided = true>
    +struct cutlass::gemm::ProjectOperand< operand, Kstrided >

    + +

    Project's a coordinate (K, N, M) onto inner and outer dimensions defined for each operand.

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html new file mode 100644 index 0000000000..cbf6186dbd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html new file mode 100644 index 0000000000..50d9533532 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kA_00_01Kstrided_01_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided > Struct Template Reference
    +
    +
    + +

    Project A operand - (0, K, M) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    +
    +template<bool Kstrided>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kA, Kstrided >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html new file mode 100644 index 0000000000..ab297d4803 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html new file mode 100644 index 0000000000..7021dbf8b2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kB_00_01Kstrided_01_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided > Struct Template Reference
    +
    +
    + +

    Project B operand - (0, K, N) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    +
    +template<bool Kstrided>
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kB, Kstrided >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html new file mode 100644 index 0000000000..79bf952fc0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kC, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kC, true >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kC, true >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html new file mode 100644 index 0000000000..8e2882f93d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kC_00_01true_01_4.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kC, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kC, true > Struct Template Reference
    +
    +
    + +

    Project C operand - (0, N, M) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kC, true >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html new file mode 100644 index 0000000000..d20c5c9f66 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kD, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ProjectOperand< GemmOperand::kD, true >, including all inherited members.

    + + +
    project(Coord< 3 > const &coord)cutlass::gemm::ProjectOperand< GemmOperand::kD, true >inlinestatic
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html new file mode 100644 index 0000000000..5cf621e383 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ProjectOperand_3_01GemmOperand_1_1kD_00_01true_01_4.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: cutlass::gemm::ProjectOperand< GemmOperand::kD, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ProjectOperand< GemmOperand::kD, true > Struct Template Reference
    +
    +
    + +

    Project D operand - (0, N, M) +

    + +

    #include <gemm_operand.h>

    + + + + +

    +Static Public Member Functions

    static CUTLASS_HOST_DEVICE Coord< 3 > project (Coord< 3 > const &coord)
     
    +

    Member Function Documentation

    + +

    ◆ project()

    + +
    +
    + + + + + +
    + + + + + + + + +
    static CUTLASS_HOST_DEVICE Coord<3> cutlass::gemm::ProjectOperand< GemmOperand::kD, true >::project (Coord< 3 > const & coord)
    +
    +inlinestatic
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html new file mode 100644 index 0000000000..9d8d3d8a62 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >, including all inherited members.

    + + +
    Threads typedefcutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html new file mode 100644 index 0000000000..e4f6d4b50d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Types

    typedef Threads_ Threads
     
    +

    Member Typedef Documentation

    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Tile_, typename Threads_, bool = (Tile_::kW < Threads_::kW)>
    + + + + +
    typedef Threads_ cutlass::gemm::ReshapeThreads< Tile_, Threads_, bool >::Threads
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html new file mode 100644 index 0000000000..722512f71b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, true > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >, including all inherited members.

    + + +
    Threads typedefcutlass::gemm::ReshapeThreads< Tile_, Threads_, true >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html new file mode 100644 index 0000000000..ccfacd4f37 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ReshapeThreads_3_01Tile___00_01Threads___00_01true_01_4.html @@ -0,0 +1,118 @@ + + + + + + + +Cutlass: cutlass::gemm::ReshapeThreads< Tile_, Threads_, true > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ReshapeThreads< Tile_, Threads_, true > Struct Template Reference
    +
    +
    + +

    #include <gemm_global_tile.h>

    + + + + +

    +Public Types

    typedef Shape< Threads_::kD, Threads_::kH *Threads_::kW/Tile_::kW, Tile_::kW, 1 > Threads
     
    +

    Member Typedef Documentation

    + +

    ◆ Threads

    + +
    +
    +
    +template<typename Tile_ , typename Threads_ >
    + + + + +
    typedef Shape<Threads_::kD, Threads_::kH * Threads_::kW / Tile_::kW, Tile_::kW, 1> cutlass::gemm::ReshapeThreads< Tile_, Threads_, true >::Threads
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html new file mode 100644 index 0000000000..1cd9d1933b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig-members.html @@ -0,0 +1,115 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    AccumulatorsPerWarp typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    InstructionShape typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    kAccumulatorsPerLdsAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kAccumulatorsPerLdsBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdgAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdgBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdgCcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdsAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdsBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerLdsDcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStgDcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStsAcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStsBcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kScalarsPerStsDcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kStagescutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kThreadscutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    kWarpSizecutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >static
    MultiplyAdd typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    OutputTile typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarA typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarB typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarC typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    ScalarD typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    Warps typedefcutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html new file mode 100644 index 0000000000..41649897f3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.html @@ -0,0 +1,177 @@ + + + + + + + +Cutlass: cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ > Struct Template Reference
    +
    +
    + +

    #include <sgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SgemmConfig< OutputTile_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_ >:
    +
    +
    + + +cutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    typedef float ScalarA
     The scalar for A. More...
     
    typedef float ScalarB
     The scalar for B. More...
     
    typedef float ScalarC
     The scalar for C. More...
     
    typedef float ScalarD
     The scalar for D. More...
     
    typedef OutputTile_ OutputTile
     The tile. More...
     
    typedef ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float > MultiplyAdd
     The functor to do D = A*B + C. More...
     
    typedef MultiplyAdd::InstructionShape InstructionShape
     The shape of the instruction. More...
     
    typedef MultiplyAdd::AccumulatorsPerWarp AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef MultiplyAdd::Accumulators Accumulators
     The accumulators. More...
     
    typedef ShapeDiv< OutputTile, AccumulatorsPerWarp >::Shape Warps
     The number of warps. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmConfig< float, float, float, float, OutputTile_, ThreadMultiplyAdd< AccumulatorsPerThread_, Shape< 1, 4, 8 >, float, float, float >, kScalarsPerLdgA_, kScalarsPerLdgA_, 4, kScalarsPerLdgB_, kScalarsPerLdgB_, 4, 1, 4, 1, 2 >
    static int const kWarpSize
     The default warp size (32 threads per warp). More...
     
    static int const kThreads
     The numnber of threads. More...
     
    static int const kScalarsPerLdgA
     The number of scalars per LDG/STS/LDS for A. More...
     
    static int const kScalarsPerStsA
     
    static int const kScalarsPerLdsA
     
    static int const kScalarsPerLdgB
     The number of scalars per LDG/STS/LDS for B. More...
     
    static int const kScalarsPerStsB
     
    static int const kScalarsPerLdsB
     
    static int const kScalarsPerLdgC
     The number of scalars per LDG for C. More...
     
    static int const kScalarsPerStgD
     The number of scalars per STS/LDS/STG for D. More...
     
    static int const kScalarsPerStsD
     
    static int const kScalarsPerLdsD
     
    static int const kAccumulatorsPerLdsA
     The number of accumulators that are going to be fed from one LDS A/B. More...
     
    static int const kAccumulatorsPerLdsB
     
    static int const kStages
     The number of stages in shared memory to implement double, triple, more-buffering. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.png b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmConfig.png new file mode 100644 index 0000000000000000000000000000000000000000..5690d0823d0e5ece0d2dd47cbf5e1ded0b5c4b82 GIT binary patch literal 2930 zcmds(X;4#H7RN)QjiAWPfFRPrwmX7?fCgk2#0D`c7?4dNK@?>$?1qGhfNNC3(j>q` z5KvLjK?op-B#^K)Fmz-M5Vo)<5JLzgghfJ_*!I-aR87tF=ebqy-rxV+^WnUA|L@d& z40m-_RoSBg0)bRvrySiupsj$M;oB7Dv->)At9;oFKkMPNxw$FF1<7n-WAx8*`t7N& zulFgB@si7JXm{81AbHZ4(f6wd6a?C?0CRNkh*1!*1;j~P@`DLOL(k5&y`p(Me&BIa z>(cQoY;miF`lFDxib^TVw)4i3PO;xdv8|TawzK)GytR!BIY@kYiueO8Q4Z6f(*WsT z%JPpZ3>s&h3|ZU7Wcd^4Oy9454h|L5o;#BsE zo2C6ZU!`+T1u3L3by)V|0*&}`DyXEjfSTY6gHD5^E7szC1gYGrqa#)e_Ym_wb>H7l zn>eCdy?x9uF_a2vO9VcNf*26*I9mf|h?9G6=DhB=Kzmo%rO2M=48CN|B0T;CC5ztq z%#CV(p?FSW@DAH28<56WqZTG5#M>u<1Wj6A*JO1h?&w}_nQ8D-qcbLis)_2yK8=)2 z)H=5zZa=RjiX@4QK2JJyg$YmnB(nJw7)Ld42vOfWg(R=>&SCt4F+o%W$V6oVZ%iMY zGS{jmL{K4pWLf8WFC%*}pE#F%--dAqh< zA2}#`#Ztl`ZiOEmuv)NZav;pazNmFdtx?VMS08@Ke|wTTZA5iOSB=ePF8`ZpCPOd37Fp>gwAxCDee?7JmqUpZMYHN-nyIt(ZRo=b0acZbfC7Y0TC8;m97`D2w^3RuOqmI z#5#)d9P8RS%OIp?V^4tGv6$3*?leR?mYzo+>*aVBs9r@6w<{t>-^Drt549T2uL_UG zxC5lXU%cp;%OA*$Ad=n8x|JVTUhJrCiF;ROOP;aXx(&=GJ4ujV&&{4E=QO}l^k_8K z{&`KS-9gejwe$RD%N!xb%JMdVUy!-jm(9dF)gS{U0#Ox{lRw^CQbX+KtEQjL=?)VW zvg^F0kHvfB?vE<61_F(!L*1O`=4uiY>9))XYs&!c7+aK=-1v|$IC!B&ZXGo$c~$<} zFLi91%Q_~wiGza2@5j6fO-f03sD;MgvgUuph4uz|qHL;1sED0A$20cTCNS0J){t(a zJ)FYpr4(t&oV~Ck?WvPQB!KcOYB+IFK@wkzpo-%vVub3rMd)9vuJyC%Zt?y>Wn0*K zsEj(#NOT;fC!_Q`Ycjy0yw(h`IfkYU<<%6%`)L&$Va|~Ks;pLzgz}%8m-*iUwId(-D|78_Dd!`SzIfvBebjbZDAPvQ!U;=0 z;4NhF!_Zc+yntI+NZZ)7n+aBcL@jtgWtX`agZ(fWI7}%%!52mF2lon9M+YDU9Q%6C z`cIalQ{i;|;MROz6qX5TI}_8$XsD3Fvx;&SXLh<0=?$N~T!{$}l*Efm4O06wjwiPE z1%-&7asY(eCpL28(Vg^kj(smjxImn4u5CR-lx7x8hN$SlWLj(aY#*W&uSl6v+#}l9 z_gw`~p7(Ff7f3hOhowQBh#D|=NL%C|SGRbg|J>cTz>h0zLN^^0wra@fA)K<~D=dyB z?Eff_Y}JU+c*Xb%yZtNt8o%x(U%@VmJhir6-z;vU0nvi@rwd&CT?Vq^+HE)4>^Ajb zzq1cX9kQ|{y=?;ll@c8st`~A*PY00OVZCC`e2A2?@f>x^dAk7 zU6Xii>-3mBhM?_L4jJs@=!>9GHms{7(|<-ZVJ)xi1zOvV5d7JWWIEPaRKyJT-!#hC zn&NKoYGQq&xnTaamgdRzcHWgIbpqMdTvD5X%tvX<0krX9`u$*$#SKO^%33nb1kpi+zx^J-Xp}lc>^hru=JwC>Th4>4m_5L&+%XxX#WyTM+3DSg;yNlpVgM!#6zui*v<3 zLx%)B5!|Rd<;J~A<&n`R*jyoDTEW;t_e{pbJNenBzTnlD>y~U;YW+3vusP+{@XhbX z>o!ueLcGG75MCqlX00i}%@e!g_Rf0Hg~$dx#dZjs+5OfsX@1{K^8HfiI}*X0;)grr zi9aXzp&fxYV&I=YgR!m}lnlShFa}d%`CB981QHX0-v;pPN55=qAdzi`& zh6mZ}cFM&9V{NN9?Ww^z*>mSNL@}}`v(aNf_T~KnEt>yZkm6&TnaiS1> zr$n?G4``*r>%TOM&BL7%mXNYoKIjRLASjxZ2kw@EPXo_y%YUtL`33WT0}YMKE_{ + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html new file mode 100644 index 0000000000..87da28431b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.html @@ -0,0 +1,173 @@ + + + + + + + +Cutlass: cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ > Struct Template Reference
    +
    +
    + +

    #include <sgemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SgemmTraits< kLayoutA_, kLayoutB_, OutputTile_, EpilogueFunctor_, AccumulatorsPerThread_, kScalarsPerLdgA_, kScalarsPerLdgB_, Index_, GemmConfig_, GemmEpilogueTraits_ >:
    +
    +
    + + +cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, GemmEpilogue< GemmEpilogueTraits_ >, Index_ > +cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef GemmEpilogue< GemmEpilogueTraits_ > Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators< GemmConfig_::Accumulators::Element > ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< GemmConfig_, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::GlobalLoadStreamB, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamA, SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA< kLayoutA_, GemmConfig_ >, GemmTileTraitsHelperB< kLayoutB_, GemmConfig_ >, Index_ > ::SharedLoadStreamB, GemmEpilogue< GemmEpilogueTraits_ >, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1SgemmTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..3e5427bc9d67ae9948cae9f0bbc9a566b4afedc8 GIT binary patch literal 8487 zcmeHNeO!|F+PBl$TCZ+-%2p}cY0FhAm8YzHVOyDJEoEg&CPJ>2OG^?&1O(RUZHI-0=I|pZosZ z*YA40zt;sPKZ%W=J!9bvFE6jzAN_UHb}uiV5YPO}*QR>Dx9mIonP*t=$<`fFAP9P< zqo$#&ckpLCvmZXUZr$2>BPYRg`dZfZ*f=lGP2(hL-Lcxs>#ds~ZTetG&J>MAiS2p+ z=CRIW-ti<_)Ak+M3vW6yN_~A6vgIFu_!8xMC9wqe`v;r%p@bOGj+Y%SQ*7$X zuf~Ybx~zZq@^X1nsr}s+Ccr`D@^h2qrUKb?bJ}Srz|WM)BP?2nTTSl)D2}GH6mT@x zwd9=Z_EPReTe2jF*l&K!sh3t*Q|DKcg28Gc`As>lD_iFVA6%C_(Z@r^{>+ZN_zbpM zTYdg|u#kQ)IT*@h3PpQuGqJYz8A18X){hmcO6T_FwD;x_)HSkbru?`Gi>QF%rQkH>lt577tQ0eB(~%D8h_(wmE44P7qi z>?{75)jPiPbjArz5{vk``|Kv0rkHQBmXi95XRwXYP4bqvP2X0bBWaE?O$`*&!G$eH zBSEuG)kr_wuuIpX7DK2z>~`vt?rBFPrTQf*i$GAl42UP?Sr_P{Q!B1MBb|bURw|dn zyWHKXxQ*OFVOR{xVzFe&-HFsyZ!n9LiUblK1iy)LK?-XS?FMgnPdfZEc>j8Y_Ye=K zgDrw`(GhP4FI6`Eu{WloeHTmQH6YzZSX&gjz+=8|v7&t0*;Y&TLrLJ% zCk;nYA40#8g?}w$6$t}@E~^4%ZS0rzrp0r4JB2-Xs`LY2VYcX;G95N(CmJdBZl@i) zJhBE>LM3({E>a2)Dl`0r;v+d!q6&vkbZ@=vG`FKJeE^+v-B-dPH4QiHx>q(&;KDw_ z@oL@WrDfg-_;s;5LL+yajzICMAgKLgH=Enu(#AC5B9k|d4DlLH8H7v78Dnd;xXZL` zCt^L>__d=t136-+yOD0vB88$yvZT*DP({~)d}94cam7GxvjO z4sJwNaE+U815h`aMQI)dpn}0;JSR5$2Nf7}TNT1`a_VN(XPxbX?~ctSO{aigxUIbn ze^)>kIgs<+o2;QVPswFj6#2GL1-3wYpi;*ysnbi0kAT0@;W|c7Vm)Q~V$tc)$M&Wy z6y_`oRnB;)wXJG|kYy${OJBpq=~pXDV667COkn3ZVSrfltigW1zJw)a9cMbMB%C08 zedOGcrn+89c)XZ;fK4v6Bz%W{^9i#Ea7)ledLVe;DhoeHEnj4@;~WcIPpX_XP?gC? z2du*|@Kn;%5-m4aNZa=qmDt4etHa$1$LsdeYrH*va=)p|`ndRd?y6HBwV#kHaIAK$mio==Z>+wL32V`aBEA#a8x~`Y7DDl zEu?fx=qSmP)vPBFagV;eqPFUmI0m@zz3(uIXkjO)1Li6S+2&{a4TFC`Mb#G$vOCyD zQzRu_VVjZ_@)4XoaIFaYra?pU^@#nxEQ5RlBAsFOQDXxrsE-?aX1TsS%b(k~Ru>uS zR)-7^FSaQ%!qI%`7YOgKi+V5t?$dF@9%GAQL0wzVo=dEfn>~9#SXg?f1sO4eBQzM+ z@xj14f1To-W;HK^W$%bV&o<}B;2*^d5Agy$$xEmJtC0d&ShVDPUWsr!v9w`_`_OXP z$4%<%4Xu}p+dT0-NZ8L=Jaep?Sy4FcfDz974z5Y=1y>u;w9U#7)R~LvV zCKcZDoFp)YAV@gtL>TRywFpHOCD3O}l2du1gOvzxq!de8Cqv0^t@(wT}P1gWh*+o|~AQTI$fY7eLlCdSa zWABf?4j(l9AZkUh5EH9lmP^`NpNm>N)xHsD+*Ke;6n1>puWSVPA`ac~c)0{KhFu~p zrBQ4SFy^5wX{=*}p*}0E zms~-T+T4n8lTwNzXTF#G$NHiH+U{-WKm&~^rQ)Bu!>Y0|pED#4!=1(dRW~GMyk+rE zx0HMhyI2>J&kctZc&w{10B3}rv}V}OBLxE?`MMek?aPV8KBk^F=D->Mdv0`cK@ktQ zbk`#={3s1q#s{{DB_0Psj$B7TQhkeS3x}8vXMnk-m_1U~!4zlTDGo5Up#ZHZprnu$ zb6zHWLZ43FOG5(^?Gd`i!G~LPa{J9c%g}(!{FkpGC%WZ-bk(9-Rw#zu2z9ciP?t~J zdPnG|KfD{hC#TR2tIG|c!L;jDmyuvrrz-9|dhwGed*rm*5ohr!>+W;#6%JJNU;JH^*79gBn}P4nu#Ym`Hmr_Y{}5 zH@9A~R)~4%SiT9r&HcV7_3aXs)8p&%OjnxNuXhynH{nJ&ZhR%oHMZwHXvr2x4+KHS zvZwlP(`IQ~Rt7KCV>bZs(Y*lm)9T1`??Lmm1VppQcN1B&pf+@B8 zx8pTHNF{jI=B&=h9o5R#KPGe-I4~XasiV6M?7JTCs#IV_enJJOBPkf+3JuIJ)9zf3 zph&m#R6y$VWPHoqI+d~DK|HvIrL(3JoL84w&-KIJkcUEtly_YGmPYjY;~s_|p{H4y zL60oK5E&@dI@=8#mcG3Wa?!5pl1|uX@$O?OdhdJ%G zhgXT`HK`2BFLS4Rcv%9!DT~BRPd3R6KE*tO&{9}x-wP~rLbV!M!(UlF5HN}1(K@rLMwJ@KETi7R@>E>~!g$*E z9(Dm9IU^62B2CkRzVTf0$k;V5<9-6W&(d}uG6!uE1mQ#-j{3QZe>=Q( ze|ht*TWH;sS6?9a6z`QU(5QFL7l&UU+O4TC@S5eE`kSQ#zxXxTru~{^u)hCn^*8#= zzf!BC>|!Ocomis0^!o?*)JySkYhHTL_+5!FzZz!}hZVnct>>}-;^h&{@hWtlrwY~G z=9C#0fI@|uE)zsZ6Yw2kVa*zpT4#a4R-Ug}PF;FeSRvWES+qhc&RG>LiXuWs+CHOX zr_c!2jzAPXUV9*)Q+n~IL+nYRlg?GV#j-6TsC9ln^@0u%G?-1y(2RNkzwSMPE~mVh zvx0HjjXi@11Krm=4A@TyjZ&7J!?61F!y0l#@w|#LwWBJHbM^V*CbB1=4zA1Na%H(L zr>3h$CTNHN+}Cn5T|LS&Z$^HJ3|bbx*2vE<+E<~Yt+6x2T$uCPSWtJAFRO^Ez1#FU zdX9C~Zc%l|A8nCmIO#fuD)uy_(II`nJ;uLe4GW>_<*Yeif0G9R$*=MxG;R!WQuCl~ z`1&NVK}(*lM5fJMui_z*FhRc1n;{km8m>L;#PRkI>U-iDs7>HDW#vrf+pL9dyXnRq z$=iZla)v7{Ez*DBGFgNp(smHi8ZKQ@adN{)R`}2&PC(f@@1rz~zGND~;o=lByWIB4 zPGSNd{c;P+8mNHT0uj(iPHJ4)l`sYGS@Cke2qr=*kXkm6sr@B4o#mZEhHnI^N>sGX zz4Z(<0{Gm$uUWcS)Rl}ebL#>juQ|G}c*i>1#$1O-b%7`ovVv_`5+R^XZk)Ja_zh+M zu*p>w$d}tRbltadp{%9`!}KphKC)9?p3=e3a!oj{f;q-qM^7h9^8GAp||!rv1M) zP|_;bF}a9pf4~o)82q6>K5hdkSe#M<@cmr8Fod3(OHDuSdlY>_az5h(<=``PV55h%`$rW7*BLh774}(lKQlS|XU!xRK*+UJhYONj<}p{a zqGgw#WLd}CVRgad@O7U*OyKJx`@r+nweK-%VED1p=nfM`+fY}nddfc1ds2URTk=-1 z`dnOcKA|zP?fY#^#nm1rrS|h-59IZ@Kufm66+6&^ujQGvJK`txw?No0144(P5J9(H zQ=^j!&^Cji9Esw}^9?40P%VZfAGN(P7G1{OOv~k6cXr>?Wd=_(wTY?7S;`g-;Z>*n zsi+113h_FAk$mG7(hT!oRxxo|+;rUu*W+(`P6CDcve!jXwyERMNqF49Z>@ik0CjV6 zz(Z(1+s{>1NFE37;MH@eSG=>IY0@Aw4Phd({S(}QX?`>Gu#j_p4JQH8#>QVmN}89zvX`JZ(|>@~3uyJ#|G?Er zbp!2b=HiycEt&WKi2-(N$_r)4R2p9{g#8NE*W^LL7m9_IqcvdyXXA_YhD>WUc>l`7 z>wkKKxv~lxz!mf1drVwz%_U5^jECuhRSsAEN#@c^8LFpv4d@WGL|*0 z%Cs5U*BVe@;vf!bVqw9XXK`zBCOx};iQ{uQa~?OCUp>>k=^Quc0X=9ACI?3GfT`3e z(wUAYezrpAv5A_7MHsP8%w>bZlMeL>ZOv9=^ah2ztO+%OP@qRB_o1 zZm8MN8s}>Bq z(TN1do?>uQH(YlYJ^-E2rn6yB%f#IImE20YPqmngHEkz=*D+t42>}hrrDg7#Jfw?iv;A5LPVNsY)+xgtOkWteq*E5@@ zoq0rw&2$N;yEJF0?$I2L(J?DlBxm<8dm!dA=4m}r^e{L%!K~@n&WLcPvL33E zwm&rrqZo%0OEza8O4M#OM)lJ3WQiQ%y_P86r2^(zXn{^TCuasZVvTqL?&90j2}$> z+hR1>*K8UICs^#7Va4U>+CbUD+K@0V5MdsQP1of)&CmEzw08#;#S9@{a#k1>en?$M zs11bUoPW}j_D4Rl(mQi^`#Mi9m34=34?GlScT0-(t2?Fly^saoM_nU%xlo!Osm+@= zk^Cb2YRTq16Vc`H@95@Y(7Dh&C`~36B$FbK)5Z~I-&JG_1u$EZ{F8_kW99eg`&cBy zn5I&T-@{;X!;H4K5b?TLaoee6egr?<8-h@jE@Gx=PbskOjEvd~W9pP!)sW)aotm(N zvitqQ@0hDHK8>91xYxDR-svD$9jdND1-5l<&aKk>Ua{Q@w)%{FiMC?MG#`*Iz$n_f zk}WmDq(zn`DX@kmvUYSW))GC@zsM11>V{5C1fKVVws-i`fLuPuWYzRlB9G?IF>cl< zM-dv(>i&~nC{{)$KLP~CD33l;n(-lm(}_VtTyvc0zRYC+0dmHY;I04 zJPzxpzNsN^teZ|=^|jiAschs@^!&M%3g zJ2+QS7&qfsI%^I#pxJngUf01F)EU!OXU6~F(iwG`Cb?@(?S7>w7L5REhQy7uH_fDC z?xKcE%%VlXAIdpMxVdN#iLY1A;}=$2vTu8SBNWX?fA;+%k?dZPrz~vV3P8+VVFs6z zlHo)#CYl3%8}oXO_&)a()jEM+6&eLJ&^#4Gj|C&+=0==?K*N)jIp2DIV|~LYddh$@ zAV56zz?ehBEOrj2_tLelT35Rl6NZa{XH`op=dGX zVy)Wd$W+E@lyuob!G$j75@3H{WlN~g5gt~Pf(bKx`UVua7SCHrC>Z#(5ve=IE|MK^ z5~DD$>YIbDOJW3-sTB!o;=GImb=nC&GZaw_^ujt0=582a69xhd&t;%WMgPbuw1>($ z3K;1qZKbtG6ghvOd>3n>{GdXd;STb#M5jWik0IJ_{wQc$3X%)4<477D$~!vYY4EV@ zEfX=q{Nr~cpbnYG6c?f&tJlbb_* z7(%Y?d4x^k0Z97S + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >, including all inherited members.

    + + + + + + + + + + + + + + + + +
    commit(FetchedFragment &fetched, TransformedFragment &transformed)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    copy(FetchedFragment &fetched)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    copy(int d, FetchedFragment &fetched)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    FetchedFragment typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    Fragment typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    inc_stage()cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    initialize(Params const &params, SharedStorage &shared_storage)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    Iterator typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    iteratorcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    SharedLoadStream()cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    SharedLoadStream(Params const &params, SharedStorage &shared_storage)cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >inline
    SharedStorage typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    TransformedFragment typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    Transformer typedefcutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    transformercutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html new file mode 100644 index 0000000000..4e2badd0db --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream.html @@ -0,0 +1,526 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Iterator_ Iterator
     The load iterator. More...
     
    typedef Transformer_ Transformer
     The transformer. More...
     
    typedef Iterator::Fragment FetchedFragment
     The fragment that is copied from shared memory. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef TransformedFragment Fragment
     Make sure the fragments match. More...
     
    typedef Iterator::Storage SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE SharedLoadStream ()
     Ctor. More...
     
    CUTLASS_DEVICE SharedLoadStream (Params const &params, SharedStorage &shared_storage)
     Ctor. More...
     
    CUTLASS_DEVICE void initialize (Params const &params, SharedStorage &shared_storage)
     Initialize the stream. More...
     
    CUTLASS_DEVICE void copy (FetchedFragment &fetched)
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void copy (int d, FetchedFragment &fetched)
     Load the data from shared memory to the fetch fragment. More...
     
    CUTLASS_DEVICE void commit (FetchedFragment &fetched, TransformedFragment &transformed)
     Commit the data. More...
     
    CUTLASS_DEVICE void inc_stage ()
     Increment the stage. More...
     
    + + + + + + + +

    +Public Attributes

    Iterator iterator
     The iterator. More...
     
    Transformer transformer
     The transformer. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ FetchedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Fragment cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::FetchedFragment
    +
    + +
    +
    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef TransformedFragment cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Fragment
    +
    +

    The output fragment.

    + +
    +
    + +

    ◆ Iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator_ cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Iterator
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Storage cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer_ cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ SharedLoadStream() [1/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::SharedLoadStream ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ SharedLoadStream() [2/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::SharedLoadStream (Params const & params,
    SharedStorageshared_storage 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::commit (FetchedFragmentfetched,
    TransformedFragmenttransformed 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy() [1/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::copy (FetchedFragmentfetched)
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy() [2/2]

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::copy (int d,
    FetchedFragmentfetched 
    )
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_stage()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::inc_stage ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::initialize (Params const & params,
    SharedStorageshared_storage 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::iterator
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Transformer cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html new file mode 100644 index 0000000000..f947377b9e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html new file mode 100644 index 0000000000..84b8e79d22 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedLoadStream_1_1Params.html @@ -0,0 +1,157 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize ()
     Setup the params. More...
     
    + + + + +

    +Public Attributes

    Iterator::Params iterator
     The iterator params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator::Params cutlass::gemm::SharedLoadStream< Iterator_, Transformer_ >::Params::iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html new file mode 100644 index 0000000000..6d9707a200 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html new file mode 100644 index 0000000000..ed5b6c407e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream.html @@ -0,0 +1,405 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Iterator_ Iterator
     The store iterator. More...
     
    typedef Transformer_ Transformer
     
    typedef Transformer::InputFragment InputFragment
     The input fragment. More...
     
    typedef Transformer::OutputFragment TransformedFragment
     The fragment that is obtained after the transformation by the transformer. More...
     
    typedef InputFragment Fragment
     Make sure the fragments match. More...
     
    typedef Iterator::Storage SharedStorage
     The storage in shared memory needed by that stream. More...
     
    + + + + + + + + + + + +

    +Public Member Functions

    template<typename Fragment_ >
    CUTLASS_DEVICE SharedStoreStream (Params const &params, SharedStorage &shared_storage, Fragment_ const &fragment, int offset=0)
     Ctor. More...
     
    CUTLASS_DEVICE void copy ()
     Trigger the copy from the fragment to shared memory. More...
     
    CUTLASS_DEVICE void commit ()
     Commit the data. More...
     
    + + + + + + + + + + +

    +Public Attributes

    Iterator iterator
     The iterator. More...
     
    Transformer transformer
     The transformer. More...
     
    TransformedFragment transformed_fragment
     The fragment containing the transformed data before the copy into shared memory. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef InputFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Fragment
    +
    +

    The input fragment.

    + +
    +
    + +

    ◆ InputFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::InputFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::InputFragment
    +
    + +
    +
    + +

    ◆ Iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator_ cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Iterator
    +
    + +
    +
    + +

    ◆ SharedStorage

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Iterator::Storage cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::SharedStorage
    +
    + +
    +
    + +

    ◆ TransformedFragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer::OutputFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::TransformedFragment
    +
    + +
    +
    + +

    ◆ Transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    typedef Transformer_ cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Transformer
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ SharedStoreStream()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    +
    +template<typename Fragment_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::SharedStoreStream (Params const & params,
    SharedStorageshared_storage,
    Fragment_ const & fragment,
    int offset = 0 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ commit()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::commit ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ copy()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::copy ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::iterator
    +
    + +
    +
    + +

    ◆ transformed_fragment

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    TransformedFragment cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::transformed_fragment
    +
    + +
    +
    + +

    ◆ transformer

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Transformer cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::transformer
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html new file mode 100644 index 0000000000..6d6c5b2384 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html new file mode 100644 index 0000000000..78b0626674 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SharedStoreStream_1_1Params.html @@ -0,0 +1,157 @@ + + + + + + + +Cutlass: cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <gemm_shared_stream.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize ()
     Setup the params. More...
     
    + + + + +

    +Public Attributes

    Iterator::Params iterator
     The iterator params. More...
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params::initialize ()
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ iterator

    + +
    +
    +
    +template<typename Iterator_ , typename Transformer_ = Copy<typename Iterator_::Fragment>>
    + + + + +
    Iterator::Params cutlass::gemm::SharedStoreStream< Iterator_, Transformer_ >::Params::iterator
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html new file mode 100644 index 0000000000..43845f4d7b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits-members.html @@ -0,0 +1,106 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Delta typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Functor typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalLoadIteratorC typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerC typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    GlobalTransformerD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Index typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Iterations typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    OutputTile typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    Scalar typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarC typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    ScalarD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedLoadIteratorD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreIteratorD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    SharedStoreTransformerD typedefcutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html new file mode 100644 index 0000000000..f389630603 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.html @@ -0,0 +1,155 @@ + + + + + + + +Cutlass: cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_epilogue_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SimplifiedGemmEpilogueTraits< GemmConfig_, EpilogueFunctor_, Index_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmEpilogueTraits< GemmConfig_::OutputTile, GemmConfig_::Accumulators, Helper_::GlobalLoadIteratorC, Helper_::GlobalTransformerC, Helper_::GlobalTransformerD, Helper_::GlobalStoreIteratorD, Helper_::SharedStoreIteratorD, Helper_::SharedStoreTransformerD, Helper_::SharedLoadIteratorD, Helper_::Iterations, Helper_::Delta, EpilogueFunctor_, Index_ >
    typedef GemmConfig_::OutputTile OutputTile
     The output tile. More...
     
    typedef GemmConfig_::Accumulators Accumulators
     
    typedef Helper_::GlobalLoadIteratorC GlobalLoadIteratorC
     The iterator for C in global memory. More...
     
    typedef Helper_::GlobalTransformerC GlobalTransformerC
     The transformer for C. More...
     
    typedef Helper_::GlobalTransformerD GlobalTransformerD
     The transformer for D. More...
     
    typedef Helper_::GlobalStoreIteratorD GlobalStoreIteratorD
     The iterator for D in global memory. More...
     
    typedef Helper_::SharedStoreIteratorD SharedStoreIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::SharedStoreTransformerD SharedStoreTransformerD
     The shared store transformer for D. More...
     
    typedef Helper_::SharedLoadIteratorD SharedLoadIteratorD
     The iterator to store D in shared memory. More...
     
    typedef Helper_::Iterations Iterations
     typedef typename GemmConfig::EpilogueIterations Iterations; More...
     
    typedef Helper_::Delta Delta
     The iterations strides. More...
     
    typedef EpilogueFunctor_ Functor
     The functor in charge of the math. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef Functor::Scalar Scalar
     We do not support 3D or 4D shapes. More...
     
    typedef GlobalLoadIteratorC::Scalar ScalarC
     The scalar for C. More...
     
    typedef GlobalStoreIteratorD::Scalar ScalarD
     The scalar for D. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmEpilogueTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..eaded28cc2a2f85170d318c2f828742ea00d68e8 GIT binary patch literal 3650 zcmdT{c~p|=8nEq_RMf@{H^yAZlyFicL?9tTK;U9ibI;7Vo%!#cbAQkIzUTSg_c_n|p67Yq-}im@ z+})fs*XgWNQ&ZCfo;h`1O>J$g%64D7QguJ`$ka-uwB21i9hFL@%6u<+hv`aqpt3$) z9UUDP>JzuIivHv~L&!ZUFNm(GcuXx;Q}$84Q5U-_b^KH)d7 z{F7|JHFL{}bEh_t)K<&~PFrVNWKUoCn(EQcd4&A4%-kI+)X)2iIAuQ9fBF|We&v`U z(BRKP*;sA{Jn$FE*3JSp97_QMV_&>q$kB(V($P(emUBL`q!Rg^_KbGg+i4P=wGCAL z8ey9}c4#HuoX#Wj(i|Af?!bO&uHsK7jS1+ z!+TC%uu8i6_)t5-l?d~Yl*B1=#`?=wCU>g~keU2dGTs719bao#67rUSB&K{vnUV6w z_3@m}y~lKNg4s8KXOS@}&}%R|BT7I19A0PAHv}e|h+c^5%2_?371&N65Ow>?E3JTn z2Tlc&7?e&`bK}KathMQw#|KQXUP-w=f#noJ;;~Tcf0KB?eF5N~cydq0T&c3uLO6LH z8O*-7dB$sp1JvJu(se_fsHb2fr3e#eWR$LxJ95%W=ZKaDdACHs5wa?%e0v2!-gc~P zQn_R+t2+=4;6^t~Hl^%Ncsaks6a~nM32YIKJUm~PY0qUZNe;e+J|!`e$||~l8Gum@ z_*S6_sfjjm_{`Z|pd6x{Fv$9dX@SCa&&~fl&^a*n-1af&AR3l17`v|`%SDVE?aQJ2 zMi0 z<}S5a+8G`R01~?dVJm&qZKH^f#ZJKXQP?MvB}WHjS-vJFiXv$10+@O*MDZMRFeT;* zX}Ixjdv*(9GM}F&&xSAzi~M7pRUM#7`A}?xE9B^vaMS8G99Ff+^!>xPZnD811YHrf zUqbags-^WV!RW<;m!jijVMRis2iF82awDnbksF8k=0@8KNd7C67h}iMqLO6FtEG>L zUb;926*?3++FxU3MiJ1cgH^AkkfMI`cT0AN?Fg8)A3xR}=qG}w3i#N&J`fAQb|{z` zelM!j7-&eNkqAdeP_~xgeWxuhBG7yA5o2)?#xfvWf_0+=Ws^ognz0RnQL^qf^~ zZlK9ByPkxo9VVw7)$!RMz5S`9g)P4KK~{v#k8p17vt3Ab^NV1cMn6FC>+ytOh7-e= zDO^;Q*y5HKG{+VXl}yJ8QoQ?wmswte3UZq!%&L2A8qgfSG3emiv2Oxvh=4WE;|*FZ z#^oy|C7x-~xT;_VaYLEQyem7^gF8l&@hQvgN!5Her7oz<)GPdXbO;uoj$=G)omQn! zbYm3Hh!SvX9jYO)i%qAeMSa+OG$=(bU*cVyu~&kLZFJCW6i2$0N}{5vhaf1BdC{F> z-h()PL1%O+oN-F|W16lkwjRxjwjt1rFv6z7mTum01|rBi=ldY(ltFp-SyCGf8&=}k z?ThIH$6&+kFwFI6`vmxm?Fp9dFyH8DZS3pb50;L6S16niuzrp!6ODiLXl!dmiy`oU zwHzzO;`u~ZVu7cjmfIS0z^!GLc=(AWuBEr?Ps2MychNCI_ZA@6_Ygi}<6;&vIqT?n zJ-(#xCahTQ9bnS~ZUV)UP6r{EZqpCv%mCqtHuKX+Th335_nC`<{ocr(unMtK%sGQ3 z=;cK=x*}nc%WTD~(wtx#RrlAl(uIjG6Le|>&~Ais)dguJUnk-1mCk7c1)12zXEYFU zKGC?RKyzlO8aacvwHmQ9MCvmn=O~sF`bhk4_#4zkH)In; zPv`dKCzrKve@$6GSNlDst5!K_=#l?OUdQ`|^bmd-y9}s4ok{U?fFijZ;fF-Zue7bZ!i(TfU{tO(N5{G_mv%7T1`v)o%aK z52x@N^fT`6b>XHa3Gwo%n!}_SQvm; zC!Q{ID(C8bF&-K<*VITJjlZ)k}|=wFG3C7W7&VT&)tvQ6Po> zxhRa_N_Mx=l{UG7EMl1opD{>P=q>Z6*cbN>2XVh=qyl$N$2@P4b zWy{FvbN*fxeqP{XZw@mu8b11>R$~e}xvwPtq&)rp zy;oxGPy&Md`9M->OaMvuk9N%K6oNBMY0k-5am4i`1HYA_I_;siW~vMkygV!!%?QYN$6}l{AkrbP z2&r^11D$5Uo_)zSm`mlO9WvUtq56r1Bco^e1zp5*>EwH}Ys8!0+$uC^F;#tp; lKiJjus}tIPLmIPbVY@2J^Ck{|)d&R|o(A literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html new file mode 100644 index 0000000000..eb82c01569 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits-members.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + +
    BlockSwizzle typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ClearAccumulators typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Epilogue typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GemmConfig typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    GlobalLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    Index typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    kLayoutAcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    kLayoutBcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >static
    MultiplyAdd typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    OutputTile typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarC typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    ScalarD typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    shared_load_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    shared_store_fence(bool in_loop)cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >inlinestatic
    SharedLoadStreamA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedLoadStreamB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageA typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    SharedStoreStorageB typedefcutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html new file mode 100644 index 0000000000..9a2328d5ed --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.html @@ -0,0 +1,172 @@ + + + + + + + +Cutlass: cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    +
    +Inheritance diagram for cutlass::gemm::SimplifiedGemmTraits< kLayoutA_, kLayoutB_, GemmConfig_, Epilogue_, Index_, GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Helper_ >:
    +
    +
    + + +cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > > + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    typedef GemmConfig_ GemmConfig
     The configuration. More...
     
    typedef GemmConfig::OutputTile OutputTile
     The output tile. More...
     
    typedef Helper_::GlobalLoadStreamA GlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamA ::Scalar ScalarA
     The scalar for A. More...
     
    typedef Helper_::GlobalLoadStreamB GlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef Helper_::GlobalLoadStreamB ::Scalar ScalarB
     The scalar for B. More...
     
    typedef Helper_::SharedLoadStreamA SharedLoadStreamA
     The iterator for A to load from shared memory. More...
     
    typedef Helper_::SharedLoadStreamB SharedLoadStreamB
     The iterator for B to load from shared memory. More...
     
    typedef GlobalLoadStreamA::SharedStoreStorage SharedStoreStorageA
     The shared storage for A. More...
     
    typedef GlobalLoadStreamB::SharedStoreStorage SharedStoreStorageB
     The shared storage for B. More...
     
    typedef GemmConfig::MultiplyAdd MultiplyAdd
     The multiply-add functor. More...
     
    typedef Epilogue_ Epilogue
     The epilogue. More...
     
    typedef Epilogue::ScalarC ScalarC
     The scalars in the epilogue. More...
     
    typedef Epilogue::ScalarD ScalarD
     
    typedef IdentityBlockSwizzle BlockSwizzle
     The block swizzle to reorganize the grid. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef ClearAccumulators< GemmConfig_::Accumulators::Element > ClearAccumulators
     Clear the accumulators. More...
     
    - Static Public Member Functions inherited from cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static CUTLASS_DEVICE void shared_load_fence (bool in_loop)
     The memory fence for shared loads. More...
     
    static CUTLASS_DEVICE void shared_store_fence (bool in_loop)
     The memory fence for shared stores. More...
     
    - Static Public Attributes inherited from cutlass::gemm::GemmTraits< GemmConfig_, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Epilogue_, IdentityBlockSwizzle, Index_, ClearAccumulators< GemmConfig_::Accumulators::Element > >
    static MatrixLayout::Kind const kLayoutA
     The layout of A. More...
     
    static MatrixLayout::Kind const kLayoutB
     The layout of B. More...
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..3686ced6985fa93050f7e1c035276f69e0fc9243 GIT binary patch literal 3188 zcmdT_dpwhEAHNwE(Nan(IYdID8gd#HV>CoVL&lp^LOGunQz}$6ry}%Z<%t|KDyJfc z9Ma=3bEceUwwi5NYwviUp68F}ulN1$y*{7&{{61&`rhB``u(o^`u-Aat<5FG_KE=j zAc3$jwF3YVFyFV`0^+~LDp6Fv*<*Y5ycv(j|5Cur_N_Ps{;rY?;yC`2(npJ4fDGmv`~#(f2spm= zJxrc&{xc2T#uE4wg?~V}Ru1`vx~p{WOdGh@CXJ9~Q6q;~q|<_j3XB=2$^%kHYJa}w zWKlUUP(6$lI8~YE?V{&`M@t!%788eCy$N~QiujRDSD5LdzLsO&_*+NGJ zvJD!u+7i>`h=+g4)N*Xow6W|HwM@tXqMyQ zp_=^dqfA`ncjjH`-eq&%cu3OvfYD4Gy$67@{gEC_GIkqHQxrJrGoTYjcK+6@MpT)(LlFluynN&P=GNmjztoh|~Qdw>#x{(R7 z$z;QS8Mr6iS7@8kW8ltCu@nQSrM^`iv4tPzybaut4-Iu2xR`+kC?thI4p886tI{S8 zNM-cDcP0`~ic)vFlr)y2vIg`iQHaZW8HDtm>V3jN9K`!vp~0G|AMa`5JvX{f40e80 zv!XeK$2@TxRQ4>H0$n)1&%idbf@BR9V}KXCom(>JFe+!gDv&BY{J9z8dUsQAT1E175+@ z`~7D1HuoOUFzgd@-)zT7VWHaT`s>bA=#5j*=r{5zf-Xznk57FI@95l z?&uZ=nC;dQi2I1Jn4D7#LAAgsGC&)Vq;bfn(X9xq7=?CAkFs^xij;1rwh8NEvJ(s@deF{5gW169fpmM%)AWh67Fm612@*$=DcKfAA$|;UJq=lbWCng z!!dJRqHihPE_@!>g;BfKc|0ke=->BJOG*yW5#e3`;{F)Lqxt>e^UPRo!5v z1xeOd%ibBE)i|YHE&=Xt1ZGa}dPcm}Iim-z-ykm+yG0|8j7n!pBRW=}Y8%dha&}tv zI$q6+sp*;T$EVxJI>QmYb8Ctv6X~QfDVOfC9ertYzG(zB)s_8)VkhCwi0dx@ISg874qlQ6IuTbJDOrR9=SN;}JzQ(%U!kYXgzrOpI zK$|MODZgJaa;cjtydl4P!JN3iMf&7u4CWs~-UUVWZ2|TGAT33n0N-E$@i>7!f*c@D zKnoCg2Z%Sv@`ko-4D!1esU^_H;09b=cVD^WhR6Tnoje)*3!=0n_zS|9{>^DVA^0!C zyEm}FCIbfG7Ybl=JuQI0ZD4b~J#qXR@r(B4-?aQ7|Bq?4j;KDnWM}!2kd8;FDyP|5 z9(tC3D^BRo;6*UNe-|CyI$$m4jAb8n?tpmv@O~akkGIMgG!%c*(%9pfk~hxj+-WsL zS+z(-qaTRJZKEDQ-CuO6EBC**0t(T4qkFFXkLwu=)K(gH^#roM8&G`LO%U*23D>fs0*)cD@a^*m5aaxdX$ZE^_{h&Yktt6 zOs{GP5OaV7yh|ojNliKH2lg1-7}ElH{*76mrZ_#pEZeaLvS;2b%<32t*02w|#-+9N zE~q#~Zd*R+px~a!jAzG8hNNk6~mp3JMtMaZ`!BN^6mWkp5aNUX^f`Tb4ZAJs-uC=ez&|c7{Az(g(^Z2bbGcdFsaS|hsq?6 zo~iL^Gk~%c|L`$@rOH=(WHZsr=Csi%s^D#ogA5YYq>CK$<2t31d<`$6EF6vXOs-J$ zcYbiVGpvm-;soXEGjfM=qEK9JN0C=XSNp3+TolqA_F2|n8i-I~3B*Yw*smfuSYv(~ zi9d*s2WE~0PAG&9H}){uKTum<3AuVv;XO%B^^BmtB2K8WY;vmd)$sR25)xiPu=dg7 zyVowG{L1U}TkbkCd9cG)eh+%CjJFOa$;?K)VVOHfHh&6UUUod(?)>>-Vl~9&l)d+> zo_iu%0IdlI(B1Q_)|7P?33s>wbMbY-_1%iEjgjH&Z6VmG!B1V`r;rA&E>D2#GW>%C zuVpRMm^hHc!cPWX;&HOTReW@7t-CX$N@wAiYoMF(I~kVPA`_bX;5@%r2<(Z=s^DKE zd?AD`th1=G1Lk%s?8NvFRy}Gq>8)Gew4gA(^MBLBsaeoJ`a literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html new file mode 100644 index 0000000000..65e20abd33 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + +
    GlobalLoadIteratorA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalLoadIteratorB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalLoadStreamA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalLoadStreamB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalTransformerA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    GlobalTransformerB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadIteratorA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadIteratorB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadStreamA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedLoadStreamB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedStoreIteratorA typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    SharedStoreIteratorB typedefcutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html new file mode 100644 index 0000000000..6b3b049412 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1SimplifiedGemmTraitsHelper.html @@ -0,0 +1,328 @@ + + + + + + + +Cutlass: cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperA_::GlobalTileTraits, Index_ > GlobalLoadIteratorA
     The global iterator to load A from global memory. More...
     
    typedef Copy< typename GlobalLoadIteratorA::FragmentGlobalTransformerA
     The data converter for A before storing to shared memory. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperA_::SharedStoreTileTraits, typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorA
     The iterator to store A to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerAGlobalLoadStreamA
     The stream to load A from global memory to shared memory. More...
     
    typedef GemmGlobalIteratorAb< typename GemmTileTraitsHelperB_::GlobalTileTraits, Index_ > GlobalLoadIteratorB
     The global iterator to load B from global memory. More...
     
    typedef Copy< typename GlobalLoadIteratorB::FragmentGlobalTransformerB
     The data converter for B before storing to shared memory. More...
     
    typedef TileStoreIterator< typename GemmTileTraitsHelperB_::SharedStoreTileTraits, typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedStoreIteratorB
     The iterator to store B to shared memory. More...
     
    typedef GlobalLoadStream< GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerBGlobalLoadStreamB
     The stream to load B from global memory to shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperA_::SharedLoadTileTraits, typename GemmTileTraitsHelperA_::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorA
     The iterator to load A from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorASharedLoadStreamA
     The stream to load A from shared memory. More...
     
    typedef TileLoadIterator< typename GemmTileTraitsHelperB_::SharedLoadTileTraits, typename GemmTileTraitsHelperB_::Scalar, IteratorAdvance::kH, MemorySpace::kSharedSharedLoadIteratorB
     The iterator to load B from shared memory. More...
     
    typedef SharedLoadStream< SharedLoadIteratorBSharedLoadStreamB
     The stream to load B from shared memory. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ GlobalLoadIteratorA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA_::GlobalTileTraits, Index_> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadIteratorA
    +
    + +
    +
    + +

    ◆ GlobalLoadIteratorB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB_::GlobalTileTraits, Index_> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadIteratorB
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadStreamA
    +
    + +
    +
    + +

    ◆ GlobalLoadStreamB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalLoadStreamB
    +
    + +
    +
    + +

    ◆ GlobalTransformerA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef Copy<typename GlobalLoadIteratorA::Fragment> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalTransformerA
    +
    + +
    +
    + +

    ◆ GlobalTransformerB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef Copy<typename GlobalLoadIteratorB::Fragment> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::GlobalTransformerB
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperA_::SharedLoadTileTraits, typename GemmTileTraitsHelperA_::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadIteratorA
    +
    + +
    +
    + +

    ◆ SharedLoadIteratorB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileLoadIterator<typename GemmTileTraitsHelperB_::SharedLoadTileTraits, typename GemmTileTraitsHelperB_::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadIteratorB
    +
    + +
    +
    + +

    ◆ SharedLoadStreamA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorA> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadStreamA
    +
    + +
    +
    + +

    ◆ SharedLoadStreamB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef SharedLoadStream<SharedLoadIteratorB> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedLoadStreamB
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorA

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperA_::SharedStoreTileTraits, typename GemmTileTraitsHelperA_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedStoreIteratorA
    +
    + +
    +
    + +

    ◆ SharedStoreIteratorB

    + +
    +
    +
    +template<typename GemmTileTraitsHelperA_ , typename GemmTileTraitsHelperB_ , typename Index_ >
    + + + + +
    typedef TileStoreIterator<typename GemmTileTraitsHelperB_::SharedStoreTileTraits, typename GemmTileTraitsHelperB_::SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::SimplifiedGemmTraitsHelper< GemmTileTraitsHelperA_, GemmTileTraitsHelperB_, Index_ >::SharedStoreIteratorB
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html new file mode 100644 index 0000000000..fd4bda0284 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd-members.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >, including all inherited members.

    + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    AccumulatorsPerThread typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    AccumulatorsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    FragmentA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    FragmentB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    InstructionShape typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >inline
    ScalarA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    ScalarB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    ScalarC typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    ThreadMultiplyAdd()cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >inline
    ThreadsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html new file mode 100644 index 0000000000..2fcd68bdfa --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd.html @@ -0,0 +1,382 @@ + + + + + + + +Cutlass: cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ > Struct Template Reference
    +
    +
    + +

    Template performing matrix multiply-add operation within a thread. +

    + +

    #include <thread_multiply_add.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef AccumulatorsPerThread_ AccumulatorsPerThread
     The number of accumulators per thread. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The number of threads per warp. More...
     
    typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef ScalarA_ ScalarA
     The type for A. More...
     
    typedef Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
     The fragment for A. More...
     
    typedef ScalarB_ ScalarB
     The type for B. More...
     
    typedef Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
     The fragment for B. More...
     
    typedef ScalarC_ ScalarC
     The type for C and D. More...
     
    typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
     The accumulators. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ThreadMultiplyAdd ()
     Ctor. More...
     
    CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW, 16> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerThread

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::AccumulatorsPerThread
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ FragmentA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::FragmentA
    +
    + +
    +
    + +

    ◆ FragmentB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::FragmentB
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef Shape<1, 1, 1, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ScalarA_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ScalarB_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ScalarC_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ScalarC
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ThreadsPerWarp
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ThreadMultiplyAdd()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::ThreadMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ , typename ScalarA_ , typename ScalarB_ , typename ScalarC_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >::multiply_add (FragmentA const & a,
    FragmentB const & b,
    Accumulators const & c,
    Accumulatorsd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html new file mode 100644 index 0000000000..ddea01f4e0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_0179827d5e1abec446b31df6ae50a9c4.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >, including all inherited members.

    + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    AccumulatorsPerThread typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    AccumulatorsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    FragmentA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    FragmentB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    InstructionShape typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >inline
    ScalarA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    ScalarB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    ScalarC typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    ThreadMultiplyAdd()cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >inline
    ThreadsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html new file mode 100644 index 0000000000..2d62b3e28f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_01half_00_01half_00_01half_01_4.html @@ -0,0 +1,383 @@ + + + + + + + +Cutlass: cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > Struct Template Reference
    +
    +
    + +

    Template performing matrix multiply-add operation within a thread. +

    + +

    #include <hgemm_multiply_add.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 1, 1, 2, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef AccumulatorsPerThread_ AccumulatorsPerThread
     The number of accumulators per thread. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The number of threads per warp. More...
     
    typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef half ScalarA
     The type for A. More...
     
    typedef Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
     The fragment for A. More...
     
    typedef half ScalarB
     The type for B. More...
     
    typedef Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
     The fragment for B. More...
     
    typedef half ScalarC
     The type for C and D. More...
     
    typedef Fragment< half, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
     The accumulators. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ThreadMultiplyAdd ()
     Make sure there's an even number of elements in both dimensions. More...
     
    CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<half, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerThread

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerThread
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ FragmentA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentA
    +
    + +
    +
    + +

    ◆ FragmentB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::FragmentB
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Shape<1, 1, 2, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef half cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef half cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef half cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ScalarC
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadsPerWarp
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ThreadMultiplyAdd()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::ThreadMultiplyAdd ()
    +
    +inline
    +
    +

    Ctor.

    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >::multiply_add (FragmentA const & a,
    FragmentB const & b,
    Accumulators const & c,
    Accumulatorsd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html new file mode 100644 index 0000000000..16dd10792e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_ea75a025471611dd709d5f2a07d1bc06.html @@ -0,0 +1,102 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >, including all inherited members.

    + + + + + + + + + + + + + +
    Accumulators typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    AccumulatorsPerThread typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    AccumulatorsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    FragmentA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    FragmentB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    InstructionShape typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >inline
    ScalarA typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    ScalarB typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    ScalarC typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    ThreadMultiplyAdd()cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >inline
    ThreadsPerWarp typedefcutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, half, half, half >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html new file mode 100644 index 0000000000..d358dd3b3a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1ThreadMultiplyAdd_3_01AccumulatorsPerThread___00_01ThreadsPerWarp___00_f5353db950bbf0023472029cac4814b6.html @@ -0,0 +1,382 @@ + + + + + + + +Cutlass: cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int > Struct Template Reference
    +
    +
    + +

    Template performing matrix multiply-add operation within a thread. +

    + +

    #include <igemm_multiply_add.h>

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef Shape< 4, 1, 1 > InstructionShape
     The shape of the instruction. More...
     
    typedef AccumulatorsPerThread_ AccumulatorsPerThread
     The number of accumulators per thread. More...
     
    typedef ThreadsPerWarp_ ThreadsPerWarp
     The number of threads per warp. More...
     
    typedef ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
     The number of accumulators per warp. More...
     
    typedef int8_t ScalarA
     The type for A. More...
     
    typedef Fragment< ScalarA, AccumulatorsPerThread::kW *4 > FragmentA
     The fragment for A. More...
     
    typedef int8_t ScalarB
     The type for B. More...
     
    typedef Fragment< ScalarB, AccumulatorsPerThread::kH *4 > FragmentB
     The fragment for B. More...
     
    typedef int ScalarC
     The type for C and D. More...
     
    typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW > Accumulators
     The accumulators. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE ThreadMultiplyAdd ()
     Ctor. More...
     
    CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
     Multiply : d = a*b + c. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Accumulators

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::Accumulators
    +
    + +
    +
    + +

    ◆ AccumulatorsPerThread

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef AccumulatorsPerThread_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerThread
    +
    + +
    +
    + +

    ◆ AccumulatorsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ShapeMul<AccumulatorsPerThread, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::AccumulatorsPerWarp
    +
    + +
    +
    + +

    ◆ FragmentA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarA, AccumulatorsPerThread::kW * 4> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentA
    +
    + +
    +
    + +

    ◆ FragmentB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Fragment<ScalarB, AccumulatorsPerThread::kH * 4> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::FragmentB
    +
    + +
    +
    + +

    ◆ InstructionShape

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef Shape<4, 1, 1> cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::InstructionShape
    +
    + +
    +
    + +

    ◆ ScalarA

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef int8_t cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarA
    +
    + +
    +
    + +

    ◆ ScalarB

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef int8_t cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarB
    +
    + +
    +
    + +

    ◆ ScalarC

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef int cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ScalarC
    +
    + +
    +
    + +

    ◆ ThreadsPerWarp

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + +
    typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadsPerWarp
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ ThreadMultiplyAdd()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::ThreadMultiplyAdd ()
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ multiply_add()

    + +
    +
    +
    +template<typename AccumulatorsPerThread_ , typename ThreadsPerWarp_ >
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, int8_t, int8_t, int >::multiply_add (FragmentA const & a,
    FragmentB const & b,
    Accumulators const & c,
    Accumulatorsd 
    )
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html new file mode 100644 index 0000000000..633e1cdc73 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd-members.html @@ -0,0 +1,131 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >, including all inherited members.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AccessType typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Base typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    data()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    data() constcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Delta typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Fragment typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentConstIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentElement typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentIterator typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    FragmentShape typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    ImmediateOffsetStrides typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    inc_advance()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_c()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_d()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_h()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    inc_w()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    Index typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >inlinestatic
    Iterations typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    kAccessSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kAdvancecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kFragmentSizecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kIteratorFragmentcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    kLayoutcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >static
    kMemorySpacecutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >static
    paramscutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Pointer typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    predicatescutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    PredicateVector typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Scalar typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Skew typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Storage typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    This_ typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    thread_offsetcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    ThreadOffset typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Threads typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    Tile typedefcutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    Traits typedefcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
    valid(int d, int h, int w, int c) constcutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    WmmaGemmGlobalIteratorCd()cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html new file mode 100644 index 0000000000..e9c160775e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.html @@ -0,0 +1,820 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > Struct Template Reference
    +
    +
    + +

    #include <wmma_gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >:
    +
    +
    + + +cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > + +
    + + + + + +

    +Classes

    struct  Params
     The params. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > This_
     This class. More...
     
    typedef TileTraits_ Traits
     The traits. More...
     
    typedef TileIteratorBase< Traits, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > ImmediateOffsetStrides
     Override the strides in each dimension between different loads/stores. More...
     
    typedef TileTraits_::Scalar Scalar
     The scalar. More...
     
    typedef TileTraits_::Pointer Pointer
     The pointer. More...
     
    typedef TileTraits_::Threads Threads
     The threads. More...
     
    typedef Index_ Index
     The index. More...
     
    typedef TileTraits_::ThreadOffset ThreadOffset
     The thread offset functor. More...
     
    - Public Types inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    typedef TileTraits_ Traits
     concept TileTraits More...
     
    typedef TileTraits_::Scalar Scalar
     Scalar element. More...
     
    typedef TileTraits_::Scalar FragmentElement
     Fragment element. More...
     
    typedef Index_ Index
     Index type. More...
     
    typedef Shape< 0, 0, 0, 0 > Skew
     Skew quantity. More...
     
    typedef Traits::Tile Tile
     Tile shape. More...
     
    typedef Traits::Delta Delta
     Distance along each dimension. More...
     
    typedef Traits::ImmediateOffsetStrides ImmediateOffsetStrides
     The strides in each dimension between different loads/stores. More...
     
    typedef Traits::Iterations Iterations
     Iterations. More...
     
    typedef Traits::ThreadOffset ThreadOffset
     Thread offset. More...
     
    typedef Vectorize< FragmentElement, kAccessSize >::Type AccessType
     The elements loaded/store by one instruction. More...
     
    typedef Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSizeStorage
     The storage. More...
     
    typedef Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSizeFragment
     The fragment. More...
     
    typedef FragmentIterator< Fragment, Iterations, AccessTypeFragmentIterator
     The fragment iterator. More...
     
    typedef FragmentConstIterator< Fragment, Iterations, AccessTypeFragmentConstIterator
     The fragment const iterator. More...
     
    typedef FragmentIterator::FragmentShape FragmentShape
     The shape of the fragment. More...
     
    typedef PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
     Default predicate mask type. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd ()
     Ctor. More...
     
    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd (Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
     Ctor. More...
     
    CUTLASS_DEVICE void inc_c ()
     Increment the pointer in the C dimension. More...
     
    CUTLASS_DEVICE void inc_w ()
     Increment the pointer in the W dimension. More...
     
    CUTLASS_DEVICE void inc_h ()
     Increment the pointer in the H dimension. More...
     
    CUTLASS_DEVICE void inc_d ()
     Increment the pointer in the D dimension. More...
     
    CUTLASS_DEVICE void inc_advance ()
     Increment the pointer to move to the next iteration. More...
     
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Test the predicate. More...
     
    CUTLASS_HOST_DEVICE Pointer data ()
     Returns the raw pointer. More...
     
    CUTLASS_HOST_DEVICE Pointer const data () const
     
    - Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    CUTLASS_DEVICE bool valid (int d, int h, int w, int c) const
     Is the iterator valid? More...
     
    + + + + + + + + +

    +Public Attributes

    Params params
     
    Coord< 4 > thread_offset
     
    cutlass::PredicateVector< Base::Iterations::kW > predicates
     The predicates for the row. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Static Public Attributes

    static MatrixLayout::Kind const kLayout = TileTraits_::kLayout
     The layout. More...
     
    - Static Public Attributes inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static IteratorAdvance::Kind const kAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    static IteratorFragment::Kind const kIteratorFragment
     Specifies iterator storage fragment type (Scalar or WmmaMatrix) More...
     
    static MemorySpace::Kind const kMemorySpace
     Source or destination memory space. More...
     
    static int const kAccessSize
     The number of scalars accessed per load/store. More...
     
    static int const kFragmentSize
     The size of storage needed per fragment. More...
     
    + + + + + +

    +Additional Inherited Members

    - Static Public Member Functions inherited from cutlass::TileIteratorBase< TileTraits_, TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ >
    static CUTLASS_DEVICE void initialize_predicates (PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
     Initializes a predicate vector. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileIteratorBase<Traits, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Base
    +
    + +
    +
    + +

    ◆ ImmediateOffsetStrides

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ImmediateOffsetStrides
    +
    + +
    +
    + +

    ◆ Index

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef Index_ cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Index
    +
    + +
    +
    + +

    ◆ Pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Pointer cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Pointer
    +
    + +
    +
    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Scalar cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Scalar
    +
    + +
    +
    + +

    ◆ This_

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef WmmaGemmGlobalIteratorCd<TileTraits_, Index_> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::This_
    +
    + +
    +
    + +

    ◆ ThreadOffset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::ThreadOffset cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::ThreadOffset
    +
    + +
    +
    + +

    ◆ Threads

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_::Threads cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Threads
    +
    + +
    +
    + +

    ◆ Traits

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    typedef TileTraits_ cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Traits
    +
    + +
    +
    +

    Constructor & Destructor Documentation

    + +

    ◆ WmmaGemmGlobalIteratorCd() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::WmmaGemmGlobalIteratorCd ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ WmmaGemmGlobalIteratorCd() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::WmmaGemmGlobalIteratorCd (Params const & params,
    const Coord< 3 > & bounds,
    const Coord< 3 > & block,
    int const pointer_offset = 0,
    int const pred_offset = 0,
    ThreadOffset thread_offset_func = ThreadOffset() 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ data() [1/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::data ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ data() [2/2]

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Pointer const cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::data () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_advance()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_advance ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_c()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_c ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_d()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_d ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_h()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_h ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ inc_w()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + +
    CUTLASS_DEVICE void cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::inc_w ()
    +
    +inline
    +
    + +
    +
    + +

    ◆ valid()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_DEVICE bool cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::valid (int d,
    int h,
    int w,
    int c 
    ) const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ kLayout

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + +
    MatrixLayout::Kind const cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::kLayout = TileTraits_::kLayout
    +
    +static
    +
    + +
    +
    + +

    ◆ params

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Params cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::params
    +
    + +
    +
    + +

    ◆ predicates

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    cutlass::PredicateVector<Base::Iterations::kW> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::predicates
    +
    + +
    +
    + +

    ◆ thread_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Coord<4> cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::thread_offset
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd.png new file mode 100644 index 0000000000000000000000000000000000000000..f5ad5c7aa3ca2fe9955ba9874e2f19effb9c234a GIT binary patch literal 1763 zcmd6oc{tPw7{`BR+Cgj5HZp5+ZLylIjFlsio1EjGAsN$(aSgHK%ss5g)i}0TBu8>3 zGA2U?;VC3B&M}BBWE^Q+lV)ezKig;j-uHRl&-eR%p7)RUpZ9rFY|$5?2N4GW0DzjC z8QTK@h%E57B47cXk}@R(^1ZE~Vnt_2mcV^!27}@JC>$e*A|!jX10Wds zyO=K=RRI7JY;J7e7%s#qe$W)NZe4Dsau-_YNYIADE7nQ%Gimy}F^Q zug9!OMlvnhcL2@c<1prB9*6kH;}B*3=xPgIf`rPs58BIy(0RJG9m7N1;3?N|k}FPi zNSqr2Wk2jbrcQ%iD5)WQXh-eDL2ix*A}>|-F*aUycNBsyJ2b(eep!B@Xwfr zlc>H_k7)ulU!-wuPY#x&#bQ&gORpv!lTrTF!aI80yl__G`6d?TLhWLWZ*yaJlS`cnCdZd%HD6d!3Lvq4FjJy+490 zm`cu2rxCYk@IYh`f5G&XjfqE;8|ItIy=$NkqjFns;8s^J{MqwMn2XQ)6%b2eI6s{m zER6}U-l-D`eQ9VBO!CbS;xXK9oV}Lx$!J5$N+VR(w_bVwsVwQ+iYK4jFbmk0edf`Y zccsXJuJiknFpZJ9nDZg)#-LAl*Q4ou6>J%wdIy&EYwcaVwY9rJa$_B#lLj&*VwCMf z@nZLVweiZfb6Npq!X1U2?o2DO%=hsS8wJL~WY}WNDL-S}3=?-lTrFI_8vW>FHzx;T z>7p4CDB-O}+xo<#hwNhftt)D5!gO{AE=3Wmc~C}XUeRKDGr~7|B%MU(Po!3p9^PI* z)4bqSP~_3fMl0PF=`GB7wCWaVuzSkUr@nATW4%o~zrO%D78}smR_=7tlp0}jk^I&{ z?W8$hq-)yZ_0(=nGgf-c)0M*Rvh4Y+2xXsI!MQ3dMTEjABnm2b)^%Q&;FrUSZ8&6M zFaP`kbIyjYO<53u3X*x_e+kX!noFLdbDpPYo~MhhxIVCsjR8P$03K9gAm9-_y&nt( zdgz~|Df*j;5GW3r2oW$M5hS4Cj{bMV9WT#G$gw|YZ{Y({hy&z-opWFIm;9Po=IoiO zF)**G9{fsaS{Lx3N_plLxA#mo52Yajb91@#RvZt)+2Q^JG@X(TUZ2BRc8S6JS|)*D zH|XzucsAAmsnL9QG@(9yI_AZ;?nLWgcNg}%>W-H=6*Q=SZq-$#2$N@v(>y z-i}tq5UsQ8X3kwH8p0EU#&-+)9|zP`bl&t^58YBsa_V-bXFu7t@l|;kjj!%sl&*De zNTzd_k`Xl|ygKbp1LC7(a@#oEzd)a%i?y3;Qg_a%{VD0`tsg6kT3A_&I!&EDFdjlW zkLF zxhG4-Q*>XFUu6VXKf24AE>V_apJi`R b{7z3bK7Qfpi1!V_M*_@E(8hI!_=LXzRN5^3 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html new file mode 100644 index 0000000000..11f86143e5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits-members.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >, including all inherited members.

    + + + + + + + + + + + + + + + +
    Base typedefcutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >
    Delta typedefcutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >
    ImmediateOffsetStrides typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Iterations typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    kAccessSizecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kLayoutcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kMemorySpacecutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    kOperandcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >static
    MultiplicandTraits typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Pointer typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Scalar typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Threads typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    ThreadsDelta typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    Tile typedefcutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html new file mode 100644 index 0000000000..d327b3c16a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.html @@ -0,0 +1,197 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ > Struct Template Reference
    +
    +
    + +

    #include <wmma_gemm_global_tile.h>

    +
    +Inheritance diagram for cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >:
    +
    +
    + + +cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > + +
    + + + + + +

    +Classes

    struct  ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Public Types

    typedef GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
     The base class. More...
     
    typedef Shape< 0, 0, Base::Delta::kW, Base::Delta::kCDelta
     Override the strides in each dimension between different loads/stores. More...
     
    - Public Types inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    typedef platform::remove_const< Scalar_ >::type Scalar
     The scalar. More...
     
    typedef Scalar_ * Pointer
     The pointer. More...
     
    typedef ReshapeTile< Tile_, kAccessSize_ >::Tile Tile
     The tile shape. More...
     
    typedef ReshapeThreads< Tile, Threads_ >::Threads Threads
     The threads shape. More...
     
    typedef Shape< 1, 1, Tile::kC > ThreadsDelta
     The relative offset between two elements in the H/W dimension in adjacent threads. More...
     
    typedef Shape< 0, Threads::kH, Threads::kW *kAccessSizeDelta
     The strides in each dimension between different loads/stores. More...
     
    typedef Shape< 0, 0, Threads::kW *ThreadsDelta::kW, kAccessSizeImmediateOffsetStrides
     Strides for immediate offset computation. More...
     
    typedef Shape< 1, Tile::kH/Threads::kH, Tile::kW/Threads::kW, Tile::kC/kAccessSizeIterations
     The number of iterations needed to load/store the tile. More...
     
    typedef GemmMultiplicandTraits< Tile, kOperand, kLayoutMultiplicandTraits
     
    + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Static Public Attributes inherited from cutlass::gemm::GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ >
    static GemmOperand::Kind const kOperand
     Identity of the operand. More...
     
    static MatrixLayout::Kind const kLayout
     The layout. More...
     
    static int const kAccessSize
     The number of scalars per LDG/STG. More...
     
    static MemorySpace::Kind const kMemorySpace
     The memory space. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Base

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef GemmGlobalTileTraits<GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_> cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::Base
    +
    + +
    +
    + +

    ◆ Delta

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + +
    typedef Shape<0, 0, Base::Delta::kW, Base::Delta::kC> cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::Delta
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.png b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits.png new file mode 100644 index 0000000000000000000000000000000000000000..4c15d9dd07f2b3160b450324ed44eec3a3f14059 GIT binary patch literal 2187 zcmc&$eK^x=A0N_OBt>~!8Cl6G=2TvCjJzZ>iIKM=AqundGQ*sq7n4S3ty-H{r{(QdIecgY2KcDL^3h+Oqt8Jct04)t*t=m7^3~a^$e!&MsG#2+igYO;9!BagQnlXdaB}_y z#nqLE7~zf4RtyRwY*l49BhkJTH02-ubuJ1(g8}+*q%|_a6UlHTL@~ z*zXHrxu_}e5z6$a&F$-L@XYgcGWio8XCvaf`@BtS*|~!$`S$&q&Uo45TTOPEaHRk9 z+;X0S^Lk{8F$8)0w5!S>+;lGt8KgQYlA`WBNDJ#Jf|gC<2b$|7KQ4HoF%vJ{WI}t| z^6UWNY?=q*V-&^PRH~d|^`G-e6?^!W>#xqeBFrn+$>yux>qx{xOY8!(wxy%V(e-%l zH%!dOk`~kX@%sD@!$U`%U9qJSmQoLiJHB4D!e1(3&~xeab~`C+pCD>^qdb#V`{Z7w zfpPnvA@UQ#H?h}0jNh#x4=wa;c+JTUyBGgjMl<_%nzX0ccaL~@eiG4p#b?xO{+Lty z=j#euSAXfYmXqTXZPRQ;kTWx_Jy~-5Z1QE5%~^MRlOZk1J)QmBP{r00cylw2UhSUV z^KOc8=O4Dig$o0^;mXGX(62!V+b}&6)T&}s(Flez3;JgZ-tGG>1;V`F7k2b zpF1*^S#hM|kqmbQJY>b?Ipcf&Ax~#*sBXA4pk(hmQ@8fX*)?)Jq4I8lTk1Q536DoyNi(KWKRk#TJxz0Q+bVt}V5Hi(tYaz@r zTeQp9)}p!_EORAA@Vk7TCOVZQ5^=tSG-GM-Zt%f}mYohvx&an(p-NxDz$f&w}1Cfx7aFB0$V@e<~zeklbOh>#R;~zq6pIa_Rl5aI|9M z+VUaK$lFGK#TXqaBBmfLS7OM}sdI#cr5Gojw_!N@BNUU`H6bEwM>KJflpuAVr} zuwby9^YZY#+=5nyhp@&d-B*N7%=J!}M5ZON@v*iHdB4J>$Kh6LWg&5aS(c1qz;p*l zH*5Mn#RVdljN~0Qxv>iquTLFUp~fHhWrwf8X6o7A%d9EnTQ>$4GlR*OIzbAXjUm9@Uj=Mo~frjf!s8PyyS{K+*rz#nk^; z{0&$pY{mQu*k90!5n_!FXn!b_gUvOssw?EaA~iG!^{KDi%gkK!uL24_Zxcs<3FP_* z*hg)51?&fTnUV>Rravo_b`}9bL*-B5($DJuyAV~0_C;Pt`_@y@zjy2eSp$!ShwAHH zRS)w-x*X7s1~LsG?WtUi8qnYyM$zh*6%{CD*A{hS24PvPr{&nbUD#Dp2FrUL{heCuyP+VfoXCXNb2PJpwi%)ySgU@-1*ae1^=+@?m-`ES5xY4P zLwyD(M)G6UUhKA$O6#WB7oNl#WV-bZrWOhbdYz2ZYZj-^{C3uJInKU#WCcU5+kf?j z;!HckW#MmSoL)S=SfbtI;*J55Cn{Al%9Di(&xuY;&qj!zxV)Mj{$-HO6dtuu*+%1&KFvy7iwy)ScwqAr?KtuIr2W zI5-eKr&yko)4k;4o@Zd0O<}ViS38tbvN24vljPE;%j1^b#^rr9;PO5jn(q}F2VY{? zn^nB~t1I(v`dpPE)z&#ds=Gx73ginE)rRuCi+htoHmwyq%Ni@nj(yLYp42?JEwUS7 z!9Is=>YrhkwVrB|xbZ`1 Vq*;@%rohhuggof)^}r*#;NSV>D7*jw literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html new file mode 100644 index 0000000000..383587f8e5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html new file mode 100644 index 0000000000..0b836280fb --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCdTraits_1_1ThreadOffset.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset Struct Reference
    +
    +
    + +

    Computes the thread offset in (H, W) based on thread ID. +

    + +

    #include <wmma_gemm_global_tile.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE Coord< 4 > operator() () const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename Scalar_ , typename Tile_ , typename Threads_ , int kAccessSize_>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE Coord<4> cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset::operator() () const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html new file mode 100644 index 0000000000..9f72d0038e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html new file mode 100644 index 0000000000..32bf2ce884 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1Params.html @@ -0,0 +1,298 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params Struct Reference
    +
    +
    + +

    The params. +

    + +

    #include <wmma_gemm_global_tile.h>

    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE int initialize (Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w)
     Setup the params. More...
     
    + + + + + + + + + + + + + + + + + + + + +

    +Public Attributes

    Pointer pointer
     The pointer. More...
     
    Index stride_h
     The stride in the H dimension to setup the thread in the block. More...
     
    Index inc_h
     The strides to increment the pointer. More...
     
    Index inc_advance
     
    Index predicate_offset
     The column offset to compute the predicate for the columns. More...
     
    Index predicate_inc_h
     The strides to increment the predicate offset. More...
     
    Index predicate_inc_advance
     
    +

    Member Function Documentation

    + +

    ◆ initialize()

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE int cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::initialize (Pointer pointer,
    Index ld,
    Index n,
    Index epilogue_stride_w,
    Index epilogue_delta_w 
    )
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_advance
    +
    + +
    +
    + +

    ◆ inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::inc_h
    +
    + +
    +
    + +

    ◆ pointer

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Pointer cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::pointer
    +
    + +
    +
    + +

    ◆ predicate_inc_advance

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_advance
    +
    + +
    +
    + +

    ◆ predicate_inc_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_inc_h
    +
    + +
    +
    + +

    ◆ predicate_offset

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::predicate_offset
    +
    + +
    +
    + +

    ◆ stride_h

    + +
    +
    +
    +template<typename TileTraits_ , typename Index_ = int>
    + + + + +
    Index cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params::stride_h
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html new file mode 100644 index 0000000000..78f13205d9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1gemm_1_1WmmaGemmGlobalIteratorCd_1_1SharedStorage.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::SharedStorage Struct Reference
    +
    +
    + +

    The shared memory storage needed by the iterator. +

    + +

    #include <wmma_gemm_global_tile.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1is__pow2-members.html b/docs/generated-html/structcutlass_1_1is__pow2-members.html new file mode 100644 index 0000000000..8ec0131585 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1is__pow2-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::is_pow2< N > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1is__pow2.html b/docs/generated-html/structcutlass_1_1is__pow2.html new file mode 100644 index 0000000000..01c0ea1679 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1is__pow2.html @@ -0,0 +1,125 @@ + + + + + + + +Cutlass: cutlass::is_pow2< N > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::is_pow2< N > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    +
    +Inheritance diagram for cutlass::is_pow2< N >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(N &(N - 1))==0 > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(N &(N - 1))==0 >
    static const bool value
     
    +

    Detailed Description

    +

    template<int N>
    +struct cutlass::is_pow2< N >

    + +

    Statically determine if N is a power-of-two

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1is__pow2.png b/docs/generated-html/structcutlass_1_1is__pow2.png new file mode 100644 index 0000000000000000000000000000000000000000..00d3a504c3fe21e6a2698a862a38a8b6598b78e8 GIT binary patch literal 1061 zcmeAS@N?(olHy`uVBq!ia0y~yV2lN_12~w0Wbgj4_drS_z$e7@|Ns9$=7+B@mK`dc z0AzvjfddC3HdcfIxf~@ye!&btMIdnXREQA+1M?P77srqa#Uq?WvR<&*PJbjS|8{rqks6v^f|?RMdxlZZc<^L^rh_3I+aSnGS!pa8?-&+T4Fsv zB^}SJ%9&K+^18_0tG&FZRyFff=W;)*ncSBhuXV4KD-+ahzpZ9{sA)-}_vP*N^X~@7 z)ivC$UeW*9uY7XWtZo0ERh2I>-~II6p}#wpBuj3*tzW#SY@htyvajD%TR!Fgy}s}7 zfv=5!Kc9Vl<+W}{UQ6DIhq1q-&z?}75W9Q#&B*n2=ks#&?I%y(c6PUk*LRhLs9`+oMRgk5{G%6l>+_n&{uC%RI; zXa%qOn{{5hH+u$)eh7M3wvO-ZwrZbW=QbI58p%9cw`9xHnRhF{?Vq>%*gDR`s^$FM zuNJ(DylWdPxy%1$;oZ3{n=~b!zkYY2IzlLI>b{ll`sROpn!Imbso<8WH+Eh5_gbNP z{^8PZzuLD(x39Tb^Ko{Y)uF2G7dMJ@-Q*~<)qDF{%`?|}!)_a|p#7%1CzVY5d0F*k z>OG4oDze7rpM!M#+a_GeoqlVs{-#grp6}CQ!zY|J7)Xcvy?GPwq z{KK)G0T}fR`iJxy#DPk>H*i02y}|lH=?znbU>V~cj@^_>*UY**C5S7?>m;L)1H*?h zG?^hoa$bY*j?Do + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_down< N, CurrentVal, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_down< N, CurrentVal, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_down< N, CurrentVal, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__down.html b/docs/generated-html/structcutlass_1_1log2__down.html new file mode 100644 index 0000000000..bee9f1c87f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: cutlass::log2_down< N, CurrentVal, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_down< N, CurrentVal, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + + +

    +Public Types

    enum  { value = log2_down<N, (CurrentVal >> 1), Count + 1>::value + }
     Static logarithm value. More...
     
    +

    Detailed Description

    +

    template<int N, int CurrentVal = N, int Count = 0>
    +struct cutlass::log2_down< N, CurrentVal, Count >

    + +

    Statically determine log2(N), rounded down

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int CurrentVal = N, int Count = 0>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html new file mode 100644 index 0000000000..9e97ad5958 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_down< N, 1, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_down< N, 1, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_down< N, 1, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html new file mode 100644 index 0000000000..115f5dec0a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__down_3_01N_00_011_00_01Count_01_4.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: cutlass::log2_down< N, 1, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_down< N, 1, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = Count + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int Count>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up-members.html b/docs/generated-html/structcutlass_1_1log2__up-members.html new file mode 100644 index 0000000000..b402e19a24 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_up< N, CurrentVal, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_up< N, CurrentVal, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_up< N, CurrentVal, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up.html b/docs/generated-html/structcutlass_1_1log2__up.html new file mode 100644 index 0000000000..76434f767a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up.html @@ -0,0 +1,128 @@ + + + + + + + +Cutlass: cutlass::log2_up< N, CurrentVal, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_up< N, CurrentVal, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + + +

    +Public Types

    enum  { value = log2_up<N, (CurrentVal >> 1), Count + 1>::value + }
     Static logarithm value. More...
     
    +

    Detailed Description

    +

    template<int N, int CurrentVal = N, int Count = 0>
    +struct cutlass::log2_up< N, CurrentVal, Count >

    + +

    Statically determine log2(N), rounded up

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int CurrentVal = N, int Count = 0>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html new file mode 100644 index 0000000000..43e9b591a8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::log2_up< N, 1, Count > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::log2_up< N, 1, Count >, including all inherited members.

    + + +
    value enum valuecutlass::log2_up< N, 1, Count >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html new file mode 100644 index 0000000000..7fffdf1b7f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1log2__up_3_01N_00_011_00_01Count_01_4.html @@ -0,0 +1,122 @@ + + + + + + + +Cutlass: cutlass::log2_up< N, 1, Count > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::log2_up< N, 1, Count > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = ((1 << Count) < N) ? Count + 1 : Count + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N, int Count>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html b/docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html new file mode 100644 index 0000000000..3de8d20ba9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1aligned__chunk.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: cutlass::platform::aligned_chunk< Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::aligned_chunk< Align > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html new file mode 100644 index 0000000000..fd6fe12ec7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::aligned_storage< Len, Align > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::aligned_storage< Len, Align >, including all inherited members.

    + + +
    type typedefcutlass::platform::aligned_storage< Len, Align >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html new file mode 100644 index 0000000000..aff24062b1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1aligned__storage.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::aligned_storage< Len, Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::aligned_storage< Len, Align > Struct Template Reference
    +
    +
    + +

    std::aligned_storage +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<size_t Len, size_t Align>
    + + + + +
    typedef aligned_chunk<Align> cutlass::platform::aligned_storage< Len, Align >::type[Len/sizeof(aligned_chunk< Align >)]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html new file mode 100644 index 0000000000..fc7b447c38 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html new file mode 100644 index 0000000000..694be0bd36 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< value_t > Struct Template Reference
    +
    +
    + +

    std::alignment_of +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< const value_t > +cutlass::platform::alignment_of< const volatile value_t > +cutlass::platform::alignment_of< volatile value_t > + +
    + + + + +

    +Classes

    struct  pad
     
    + + + +

    +Public Types

    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of.png new file mode 100644 index 0000000000000000000000000000000000000000..ed715083dd075a7ca09786dbc5f2a7302ebc2a5b GIT binary patch literal 1846 zcmd5-X;4#F6i$LE8W6OiNF}H>>_Rk(LW3X*;sZ?B2C;~U3#d?{ktJ9{Xlq3UK^`bo zArT{sT0s_#Xb91iC0am43zGYrd*E@6X`ObOYckj7#&zX15 zpW?j`_Z1F>LM;S#dIq9UdU6E&&(}lNa;dTuG0Xk^g1vM)9RlI0!IGy57ZLPv3WdVZ zN=69c&W{hI1fdYp+<5&gn1n)o(ExaQ1T)a1J;U_dTNiotX79SB)HOFTuusmC#2y`u zhgToH8tL+VKhfmH>GI)6H-;aLC0tL#Z1QTMgnHSY#ys%;4A692Qd`e>GSvxIaO1@I zd?&#K3X=88bRmqgwC!lAf~O}KNu_2U3w%F&f9Y`>ITaom88P~x$f-~$N>T<%GpPIq2olOw#>KXh=VV$E?~7Sz_( z6FVGfg|+Gw@v6usLk^eR8S^5_-(}1VFFjs#WrgZE;h-^bULD2=8=Gy4al^Gt{WgB2 zOKEkYBMnNdwdGe|^`o?P$31k(J~L}0CwlTfGMAy1e^&UCO4=V?UOQiN58fvvI;k;k zt%H$Lj#2Ncs76ul;)=O@aXo|;VZAL@snv#S7ijh>YVk8sB5j<$J2oY5UA4tA{dA05 znkgo=$1?gQ*=IOKyaBzE_Bx5~z61`SWaq3*FuUu!-I&_+c<$JoQr?||m_Ip91?C6n z(}|R37hr%`3I@#YY zIA@c4TOOFh)3eKWj3o6{n7X#z&ajvJ3qvH6MgmeYY!ECgN*fKRUJ??3`%KWD5WYJ; zytB^8#D6O~L*jwPn!qk|+hE)>ddj~vLfnAAU8cak7a?&xk>|0IE6Nz#(b$yb$b(-` zGv*?9G=U>OF*A`w{e|fP=yVKDv(8SAh*s|SC=$&T%qP+QH6w|S!z-nui4Ac)>37JE z2|i!f+=$&~|IF&wn}j)ocu#%)7eSdY3R2HLkW4Wl+SMwFnhBl%xZ;?g381nQ*ER8)w*hD>cq1imRDk?da%F`&z1vd!3l_by2R_xu*f_$RI1-f03y;GoXJT_ZHt<$B zbWLQHXsx&fYPECU1X>qLOKK#$1Dt&RIytROJ|XyW;hTFfTkPAi zC6_yJt+1>K^p!*|U}ve~OV8DNFOFCaQ^ymd`8-U-rI}+w-d%Ol-1$iR&WdRv&sRXc zeyvXIFgt65@mz9y%Y9yzbsg(~zU~OEI!qn|Mi^=ur?gp*31-!+12WJ+@MTA?`jpr}PaAcL)f5h9CNF|#8wQF88QCHS zN2s-@jqA*Exm56uxU1h{D3qZkk$GA99IA2W-Elq0D0$LgpWB&?ndyF!br_$7o@F^QQhyYv8Qp%O@9DtJ8IDJ-3 zor4~mzykk3Dpu?GAItgQeNbu4fYi)(HB(3W$rkCKh)E_?? + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< value_t >::pad Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html new file mode 100644 index 0000000000..55d1bd6e89 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_1_1pad.html @@ -0,0 +1,136 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< value_t >::pad Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< value_t >::pad Struct Reference
    +
    +
    + +

    #include <platform.h>

    + + + + + + +

    +Public Attributes

    value_t val
     
    char byte
     
    +

    Member Data Documentation

    + +

    ◆ byte

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    char cutlass::platform::alignment_of< value_t >::pad::byte
    +
    + +
    +
    + +

    ◆ val

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    value_t cutlass::platform::alignment_of< value_t >::pad::val
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html new file mode 100644 index 0000000000..ea64f250a6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< const value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< const value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html new file mode 100644 index 0000000000..8e98bc5309 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< const value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< const value_t > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< const value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01value__t_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..2be14abae5fe46ef33b748613437dde753f3005d GIT binary patch literal 1078 zcmeAS@N?(olHy`uVBq!ia0y~yU=#yDm)<0isWB>= zSyR1l&Q^(iX|nVCsY5-k$LD@GD>5zpmYca#zbNwdJFO`bUw*xlU#R&z)28ITeNe*p ztoqkc%RlZd|Go0^>de2}9Zmn6ZvPf1YpugS`_<+1Tba`*v7JvBNsIK{b!kp_`Q2Gv zZn-&0Ow~DmZ+tv4y)ap4?{|sWw(|o|mxC8r{$Dddt)sQ>8!m;S+&F z9}+C5Z+$FqEJp9-o)b&&_7%D;KglGXZSnY0PFl>LYjV?VTMA-cMZ_;liF;hs^gbwU z$}ZO+_1!y9vW8B~Hk5w;GiWEz`->k6E6n5dy4!C*+5GF%oNoV~uhk--wtfEe@>kNm zc~M-iBTmN!f8G84#fsU_Y$tW)w=b;kUy?MdTKMk2+dEV)XGzJ=%e|o;x#@`Oq_C}< zrT{}o*iPl8@-NkxW6SkD{i+#bt@1rzeYy1H{v`eU_hFOvE#NPlxa7arN!Lk>ki)q2 zF(mxYnl{{DB;O#?5M0E_@$09j0mI%&+zuCby_*l>bB^~T?8^}d_$miu$Be#wDzD1Xku*Pcse1eN zExcP|OBBU7joZ#r;P>f8>K*^=VZKRW9@=iUDM^|bfU! + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< const volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< const volatile value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html new file mode 100644 index 0000000000..4f8edc6ee4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< const volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< const volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< const volatile value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..94c91b652046ec740614e978475d62cb1ceb8aaf GIT binary patch literal 1141 zcmeAS@N?(olHy`uVBq!ia0y~yU~~bp12~w0WEjhzvp`BBz$e7@|Ns9$=7+B@mK`dc z0AzvjfddC3HdcfIxf~@ye!&btMIdnXREQA+0}G3%i(^Oy~h7o^$Q^KgMj(&&f=#+Vf3YVcO3* z^1JNqCuQ>;{CDlZtdD)t>L2fPO|;YY zT>m+G%MTW^SwKQ>lKkj)G^&cOLsCs|<#+$@Gr)?8|`0Qc$EaV~{F}uR%ro^t; zT*Ys3R)^;b-}d3Rd;adQMT_APZi)NMg?pDg{xJI_XUDYy{zqAx*)JAXT$$s`;hlT) z+7WTXeXL2=m-Hmv%~yE5O7VPig;R9r#&?hBlbUup&VfdQ*n#P<*%ep|VmSpBmd&5PghL?6iKXM^F#}B|mni~_hrC)C zfYH*hKuF0U0H|a|3x`066AOoyA`{D00mde;4oao}G7ETXs)l-LUUy~mV2lYxGyJW? z9^ZKlw}Gi)yBcf3nqqKzc*!g;y{lrr>!$BT2LJ6ko}2w}x|i>xoXy_;Pg>w~%{l(N z^W{&P?%%rq^s1{r{+0Y*d3tr`-_7wcf3L5ZJO9<4q@6cH=GU#AyFu^nD)$+yS9V`I zdRr-SZ(h&emfva@j&WUFpZzi}!QhzMv(@~5-Yb;jkM(Z+a_{fEX&%>;zblGwNG=Z- zxnq&M$Mcch@;lb=53=R+FSvbG?9q-lJRLy=i+^wbJEyyL&v#|LS_$Tk*}A=oyDr-_ z?TEYN_~YCY7MJWD?1fuzHotgxNgzqvB(2{1<2HXQiNDk4^!hJ*dV)`3TfwfBJ3Hmv z1D9xz6{E|H(H$V|-;K zXB+rmak<=?y>@-^YgHrL)vwQaCmYI++V(b;a9%x`c2um`*UNJ zH;XQk)c-AiL#8CX;qtCcYz2>H?HKks)H4^(2xo{Zc9oxFbKrQXz!~|}N7olHa=eyu uX#hrk#dM^^aD_KXbLkQ<&8M6GF)MI7- + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< double2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< double2 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< double2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html new file mode 100644 index 0000000000..a60e78c50e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< double2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< double2 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html new file mode 100644 index 0000000000..8f2714a949 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< double4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< double4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< double4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html new file mode 100644 index 0000000000..2084602e2e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01double4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< double4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< double4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html new file mode 100644 index 0000000000..3cbf902fce --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< float4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< float4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< float4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html new file mode 100644 index 0000000000..845ab556a7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01float4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< float4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< float4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html new file mode 100644 index 0000000000..8aa7582844 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< int4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< int4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< int4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html new file mode 100644 index 0000000000..1d78331a60 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01int4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< int4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< int4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html new file mode 100644 index 0000000000..b788913c9e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< long4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< long4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< long4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html new file mode 100644 index 0000000000..0a6a59b613 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01long4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< long4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< long4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html new file mode 100644 index 0000000000..ce64ecf4b6 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< longlong2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< longlong2 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< longlong2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html new file mode 100644 index 0000000000..b5d0d2149b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< longlong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< longlong2 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html new file mode 100644 index 0000000000..7bdc4055ee --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< longlong4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< longlong4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< longlong4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html new file mode 100644 index 0000000000..e03232f6a5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01longlong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< longlong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< longlong4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html new file mode 100644 index 0000000000..501443504a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< uint4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< uint4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< uint4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html new file mode 100644 index 0000000000..45a392e244 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01uint4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< uint4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< uint4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html new file mode 100644 index 0000000000..b18799de10 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< ulong4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< ulong4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< ulong4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html new file mode 100644 index 0000000000..45ca5ac52f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< ulong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< ulong4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html new file mode 100644 index 0000000000..867a1a97c1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< ulonglong2 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< ulonglong2 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< ulonglong2 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html new file mode 100644 index 0000000000..e74e490998 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< ulonglong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< ulonglong2 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html new file mode 100644 index 0000000000..5720978218 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< ulonglong4 > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< ulonglong4 >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< ulonglong4 >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html new file mode 100644 index 0000000000..de2f97f02a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01ulonglong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< ulonglong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< ulonglong4 > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html new file mode 100644 index 0000000000..93d5c38756 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::alignment_of< volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::alignment_of< volatile value_t >, including all inherited members.

    + + +
    value enum valuecutlass::platform::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html new file mode 100644 index 0000000000..9ad8844d23 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: cutlass::platform::alignment_of< volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::alignment_of< volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::alignment_of< volatile value_t >:
    +
    +
    + + +cutlass::platform::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1alignment__of_3_01volatile_01value__t_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..331d786ed748a8cea54379cdeea5da0124679a38 GIT binary patch literal 1103 zcmeAS@N?(olHy`uVBq!ia0y~yU{nOM12~w0Wa25~pFm0?z$e7@|Ns9$=7+B@mK`dc z0AzvjfddC3HdcfIxf~@ye!&btMIdnXREQA+1M@vk7srqa#(AD$3_Q6_=>5X#AnuksYtoOuKIvDWxl_I5Y!T<2 zn!a__26Kx)?PxJvSnX!7Sn_6WVQ%^fCPirr<&q<3IDWeB>uXxqQhWOQ^MEri*jvu^ z7jC?G;)d%b*Rq+D->z1=Ue$ii_6i4Y`OObjKa2TW+k2wl-L)+7KQTMCc6w>tzLcML zEK}aCEvwkJ@9^zs$9CLF{q33k&S>S2ZS22x`)DoO`TNR5i(jfUudUgezAaVXI(_a0 zm9<%wL7uZ-$Xcqrto`NNa;$HH%JR1-V!t=2T;6B@BfRp(@z>Lq_`jUnqVjV8aupTN ziIXO%xOzEF64B)J3|hjz5E2Cu4Ew#}88jO@CkZ$#Sijzd#UNaWiQ@~yln#bvUQP@f zC-FNhQDG6#;@iL!N-JQ%UOwDF|hMV^(yjj{;G}A=wQu_DRx3z8eya>=(PYuTAyrila|~xu3z))z4*}Q$iB} DKw1Ir literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html new file mode 100644 index 0000000000..970d8749d3 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::bool_constant< V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html new file mode 100644 index 0000000000..77a7942d0c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::bool_constant< V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::bool_constant< V > Struct Template Reference
    +
    +
    + +

    std::bool_constant +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::bool_constant< V >:
    +
    +
    + + +cutlass::platform::integral_constant< bool, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool, V >
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool, V >
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.png b/docs/generated-html/structcutlass_1_1platform_1_1bool__constant.png new file mode 100644 index 0000000000000000000000000000000000000000..0740f5a6e4e31655c27d0aee6a22c8feb2b82820 GIT binary patch literal 954 zcmeAS@N?(olHy`uVBq!ia0y~yVB`a`12~w0OMsL_fKQ0)|NsAi%nx5*EIU*> z0muU50|yR7Y^(?aayd$Z{DK*Pia_A%sSqOu24+uB7srqa#5jEpu1pj=}OUd#8rv9-+nAcb89^t!Cnz%JD_> zq_K?e{bZSHPM@PjJMJ9a`2Ew7J0%T^wJ({agzG)Gxb`dO{`Kq5GHDhn9$)5mFFDA! z**R$DTORj0@!K~i9}rAG{_xm^wQqCY?}*Vk`>%FSIZN=&tQWuT2~Iy%vnl8HeE(^= z=YF4=JEv^rKfA~!LYc>H=jiZ#d3o}Zn0N1n@B6AYZMmqLY4mu)lGS3zCoQ@C?O4F1 zlAPtue}iQnzWjeHt^K^NXa0uI3;vr~@5{V7>5J)aDL&6%@llT^q^R=x5Lo=zO1AIuq`j?+XHS!Umh&cD z_<_V?7a28~h1V@6zP+Y)ZALmn`!`o;jMnh?ty8%jcev~L)wxVRrTCKNg;KO8<{vWb z7fL<8Vb}IlSp0Dm+_`1l)VN$q>eSP{)$YYzS86wG3w1abvwU^zs{Q_A8jJJh>f~;` za(RiLX4J>M#ZQI!8diV1mcU@2vtR6ZasQf5#t+WZZZbyXA7lJ5T^Bi& + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::conditional< B, T, F > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::conditional< B, T, F >, including all inherited members.

    + + +
    type typedefcutlass::platform::conditional< B, T, F >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional.html new file mode 100644 index 0000000000..71ae69b88f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::conditional< B, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::conditional< B, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (true specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool B, class T, class F >
    + + + + +
    typedef T cutlass::platform::conditional< B, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html new file mode 100644 index 0000000000..309d3523c0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::conditional< false, T, F > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::conditional< false, T, F >, including all inherited members.

    + + +
    type typedefcutlass::platform::conditional< false, T, F >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html new file mode 100644 index 0000000000..4eee2e3ea2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1conditional_3_01false_00_01T_00_01F_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::conditional< false, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::conditional< false, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (false specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef F type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<class T , class F >
    + + + + +
    typedef F cutlass::platform::conditional< false, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html new file mode 100644 index 0000000000..0d47203dc7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::default_delete< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::default_delete< T >, including all inherited members.

    + + +
    operator()(T *ptr) constcutlass::platform::default_delete< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete.html new file mode 100644 index 0000000000..d15c650d50 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::platform::default_delete< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::default_delete< T > Struct Template Reference
    +
    +
    + +

    Default deleter. +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void cutlass::platform::default_delete< T >::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html new file mode 100644 index 0000000000..2ad2a94889 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::default_delete< T[]> Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::default_delete< T[]>, including all inherited members.

    + + +
    operator()(T *ptr) constcutlass::platform::default_delete< T[]>inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html new file mode 100644 index 0000000000..9051d14f5c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1default__delete_3_01T[]_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: cutlass::platform::default_delete< T[]> Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::default_delete< T[]> Struct Template Reference
    +
    +
    + +

    Partial specialization for deleting array types. +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void cutlass::platform::default_delete< T[]>::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html b/docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html new file mode 100644 index 0000000000..cd8ca9d5e5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1enable__if-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::enable_if< C, T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::enable_if< C, T >, including all inherited members.

    + + +
    type typedefcutlass::platform::enable_if< C, T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1enable__if.html b/docs/generated-html/structcutlass_1_1platform_1_1enable__if.html new file mode 100644 index 0000000000..2621d1739a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1enable__if.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::enable_if< C, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::enable_if< C, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (true specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool C, typename T = void>
    + + + + +
    typedef T cutlass::platform::enable_if< C, T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html new file mode 100644 index 0000000000..22e8b1c946 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1enable__if_3_01false_00_01T_01_4.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::platform::enable_if< false, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::enable_if< false, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (false specialization) +

    + +

    #include <platform.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1greater-members.html b/docs/generated-html/structcutlass_1_1platform_1_1greater-members.html new file mode 100644 index 0000000000..4cf39f7c71 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1greater-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::greater< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::greater< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constcutlass::platform::greater< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1greater.html b/docs/generated-html/structcutlass_1_1platform_1_1greater.html new file mode 100644 index 0000000000..aadc82c7ed --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1greater.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::platform::greater< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::greater< T > Struct Template Reference
    +
    +
    + +

    std::greater +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::greater< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html new file mode 100644 index 0000000000..40aca68b80 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::integral_constant< value_t, V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html new file mode 100644 index 0000000000..db936f3f93 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.html @@ -0,0 +1,261 @@ + + + + + + + +Cutlass: cutlass::platform::integral_constant< value_t, V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::integral_constant< value_t, V > Struct Template Reference
    +
    +
    + +

    std::integral_constant +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::integral_constant< value_t, V >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::is_integral< char > +cutlass::platform::is_integral< int > +cutlass::platform::is_integral< long > +cutlass::platform::is_integral< long long > +cutlass::platform::is_integral< short > +cutlass::platform::is_integral< signed char > +cutlass::platform::is_integral< unsigned char > +cutlass::platform::is_integral< unsigned int > +cutlass::platform::is_integral< unsigned long > +cutlass::platform::is_integral< unsigned long long > +cutlass::platform::is_integral< unsigned short > +cutlass::platform::is_pointer_helper< T > +cutlass::platform::is_pointer_helper< T * > +cutlass::platform::is_same< A, B > +cutlass::platform::is_same< A, A > +cutlass::platform::is_volatile< T > +cutlass::platform::is_volatile< volatile T > +cutlass::platform::is_pointer_helper< remove_cv< T >::type > +cutlass::platform::is_same< void, remove_cv< T >::type > + +
    + + + + + + +

    +Public Types

    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    + + + +

    +Static Public Attributes

    static const value_t value = V
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef integral_constant<value_t, V> cutlass::platform::integral_constant< value_t, V >::type
    +
    + +
    +
    + +

    ◆ value_type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef value_t cutlass::platform::integral_constant< value_t, V >::value_type
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator value_type()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::platform::integral_constant< value_t, V >::operator value_type () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE const value_type cutlass::platform::integral_constant< value_t, V >::operator() () const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + +
    const value_t cutlass::platform::integral_constant< value_t, V >::value = V
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.png b/docs/generated-html/structcutlass_1_1platform_1_1integral__constant.png new file mode 100644 index 0000000000000000000000000000000000000000..08d53740e8ec50546772a917b654cf518713db97 GIT binary patch literal 14622 zcmd^mdsI_r`n8?;S=)Nyn{lSrqGr@kydbF}U<8tB9ZRE?XjPDFS``okB8glAxwJZ! zWobo2F_XIC=zUqAI`(tLUY1U$$ zV~FJBJ^S6ye)h9-bZb!H?04ROXWF!Bvk9O6?el5Vey5x^?SInW`aO7Naq_9pz~Q~E zpY7P_a5%u})6xEO*HVsyGw09GKmWX|B0U)V^R0uQ2W_7QUW7ban|7?3Hf>!Q;cp-9 zNPlCfOC5Zs`FG#kDY*Hb#Ie+HpZnFRp||ZpL(yJC3ogEM4|B7uE!Bx_aOTd-a!rw&~ZmO{kA79Gy2S@G;pn^&aWN1!teXz+kI(G*8^PVL9FwSmF}v z(Qxe?F;aj16nPbvWwVZlOG|aL0G9L&%W-1z7n zKAH2FbDF~%c!=2p%_Ie_!#f@wN)l)=yQ{Ay$XZo&X`wv2*^?}>#=k?~LOpVZJ|?WU zUf0ZV4{sPd9_j<_5D-`&4*1aP8w8&HH=gX0_&dajHV@P0&T)lDcNk;+Lxy7hF=HNg zo?9m=T|5_~RyWe>n;K|}HT3gCixoG+O=&3H+b(Tm9bD<34uW^EE`#vCeVNO1>N~UO z-jMoIW}umaYlN>9VU9>`!a>7O`P1}7PD_$K#&f%5k@LTxA%({6H10W~dld(%)DZzc0U=b=c1BOrPyrM zja%dxd))Y@ACXIbtQphdtdIrHVbz||6pHfFq_vu?h+CR>Y2T5VauRP4mr}8@`q283oH%f6LZR(Mcl<;*9kg#^2YAP>Ymye2gi9@VOCDNE2y9|J zST+U)XUK>ZM~{!`eLY%dtJoO)(mzhxCWO(D!U^& z4zg=*(e96Y!Q~`l9ko50%&^20*^Bv2v=f?mqNJRnf+y}?>}aiwJjA@kB}GL= z74KkMtIN0sKW+Ts?gt@6Lhya8Y7afXsBh%F$jcg;2B9nWNOmSw*X^TbHj*gC_%3cH z7G4i!xn7s725+SF#7et6F(kZj$v2FBnxT`YLl`@|d-ZxCW+#M792WbJ%D!CWk z`n~8=_TV|}^=)G^ECIK_;oPxCZvyTvw`UoVXX0-$kFF#v>P*$4VHadQYec3@p1bVp zLI6;y3QM@<4W8XZ4PtkFrkFpcg`2wOXq&@F=zT<;Cf^x&L7AMS=Zx7kr?=*Bq~qlqJHY z1RMoroai6v`EImGUQ4(VXHgI%!r9hc2tRGdvivG^v!rLSq(9Qsk*X*Z-?vkEK&QcA zyJkpn6Q-dxwlyP0!(p3cNFncpk9yAgP+DA8Dn2IVc@mhB$lr)ja#HnBU%J`9-AhsY5Knq-y>d3@I9@-_zkIJ^r1TTuN>4NK(xqZ^xU;!MV zN`ldG>KXLTzvg%@t}wpvTpZ?UMDe=VW#@IXrvE-px%I=ZkduA?(UbH?+>)0UhlS7= z&>evQ3Tq*B$F|oT2YIH)L!Yv9I6 zl$S4HF%#)8AQ=xJx#~5L9BoMOSxiX1gFuXLI?J=LT#0&6)yo5Lh|XlaR)RcPBrZQpGETs=gF#8J_$##@20^*89v#toOF5;&)jN_UFu=)7wP zSXCM}2i~E~ej1!712XJ~O6v>Lh>B(vsg#Z^s-6Lgi7F(-!X;TeNnL11n` zs53*EebIo4U_VG=`9Y&AN>BRY)(^Pp_MAi=DszmzU57wa1*;gg?v^VTwa1BC@%h9m zcTz~LnZ}4a(NCh-wWs*2Ar5%B14u`&gfdr_&d%6R3UCVzg~aWj3Jw3`y7^$*5#kfQ zb?YPr`b?$3f+Q`SWtyZwk^&ypG22UwH^CxI}Uwo8E`( z8@==R`uqW34mEdg%M%5Lw}imlj=-YE0WcqHeY{Pn=xZeLw;9p{jrPJlW7;EK?A|hni!VsWodRx`m;sm z%X*(mJ<@CSfZB>IS{1YWp%@dzk33wXOZ%tVY<^aoLr`_mM)PWvqyUY-2TB3H89;Mj zo-5(XI!VPy*7ia#|I-k-^fCB(fu<|7wo59a*kYs{yxGpv4EPhehkJe?U*e#h=>9!m zOb7loD(krlTeGEQN6XqRnO{T38`upWdB-k$?^EiLLvO4FCjjC9!IPioLjEr=t{LkN z6cY{*-dRvRj=km>E&Nr4>WW-3%t!ILeWeI#k^pIf4?FIyfEV0&L7LHP9L%H8t4Y&C zU}R2oba>{l`$7=qj?vLy)5QpPwUtQuiN%?0>oEk;+E-!JJg(@eTr8cBG}1t9Xg^@q zSCNynWT%My$tfbcb)#pai=hE{8AO^rF`vq$fyd_rqhB>el6D2@yIZ%j^HP-ATMk-+ zE?3-G->+9bE!QhcKW@WfiZyIFLuo20qGH@hkpJd`0oKJ3>3h&fBr{hdC_S4+-3c*< zClXTmEI)lm{p{NBq^peM*j?nx$`zXCASUU_l_!r@K(kfKG0}X`3q2r%dd06I`ekBx zXcB^1V!nyLaP&oE@FDJJcT{6fIXMIhCSt_VnIsy?f}-X$zz7~V-3QI3NEIg)1gkM;H$Gbc?}V{ z^5y0|Cr$TP2p^jQ9AnsZZst7|H|TaN6dKt%G-PmgI{3y{XiG|tCcmQ4F>boF$i58e zc0eU?bpB`vD7zRo3z3+If-|20iFvS%bFzacfKK;nt>1G6c-i$Zg3+!O#}UG(!y zVpUNykfvv!ZFzgQ%6CkY#V-+g`SVbNztEl5$GT3rtPQKW>j$p7^a%YS~%#$#_*Zl&2M+K`dSb;R6 z5vg|8RBiAEK=+lJ*Jy)9WG~FjsNg8EKPhpzWz`JpKMV*03|!8AB=cgasFX7xf$h`JG;psr%RL7Fv%nx}_jHIBZQ zy4K&N|H~wH%Ndt6C@t0ez0KTHGN7NO>h%+uQc61$Qts%+j!XOeS#?-5#h9Y3Y}a!* z+fOzGx0+}<9T-+I8vb_jGYfnl%1L9LSGDKpg59aH4%3rC34@t2pK2#B74Gz|ta%Cb zc>&5r9#O>tAIsyD^K6&X+4L|3?4b9BFQHP7qYaj^yM9sI+5U(kW_d$+*WQ5;wtvk~ zGr{<0O>sV2BN)e zp{Q&QR2sldtli6=oEPAsjjiT+o`ggpu(2}ocFJ8*ywN|JR+k1ayC&|-D_a*eSU+?! z`c1&-g@%Co?X7C0YkO;7Rd!TJZ6KnLyXq`M$2g_+9Fy?yOGlO75v(--CD?W3z9eJQ zl`BeGg>RDPZJ(3rtTbk98ma7l7wkBgCP~UGD)W5MxyZ=ja#q)yPPaGGg1c=-N|Uku zV#aDR@ROZxmO*RH$d*_OR13N0n*)_vo=sJ1w%7)o&75m(*&hq1k3Eyj^FnIJ`+@OCd#^(@ zZ3?1ogKem!(HJ&8bdrw3m&xK%`+g|fliAosYxcgvrQl<*}5E4 zOqA(S(%o$hayETsuMpIi#>P_y!ZE5~%f4pwVZ=Gt$Y7NX?r!nhSw9=mD)GUhexOxi zfgN_COr+7L*FJEk*gu)l{|VZgV7o7AiYs=c264q6*}7m}7o#dH4RW+}!F9hfFbGXD z%jL`EdrI4Hip?5pMl+xTla@;&-{DAyU?KzY$xfz?;RW3WvtWZ~FF8A*ulh+$@cp6= znPGEeaAbt`(HXDI%gELrI&>11*HD$)*MjW-0mL}7E&Dr~rV(=N6E}3Th&_A-tXlN4 zF}&I4&a~_R?>NStRNq+=+_1U?SWK50NKZ%wzk1&Lz3TYi*Ar?>vHb31Xt7vGkG>mfe|G01b6#(Bh z3GkIoH^pQLT;TM94PZLVO<1w`wCTWy^EV7rr)Si(i@b=K*BWfc^jy8z}DW4}{rj{>hF9*ifB|L5lv z&MAqzlJ@fA3c$|Y#AX2Hii`zc>wxD_e$6i?)qRT&4U-608|MzV%T9t~iCqtovxdQN zu`fU|-`wk1HWlu*0PcTy9o!jH;0{05a{@$7@-t}o3Q~o4zh0F8C<0R9e#0Xz9B&pi z7;WmfV16qa2Ddp7{^S+La&V@#ZnTUOTiS(T_O7iwD}CRYIrv1$DH?{PE=VBRUmk%<6nEzm7aIi9bvfdU{2SWS>14Q6q& zv-gB=3*A4E`6514F~GT_9<1QM8t(u1O^c24Uotx`DyjB!w#O=xZQv|VNg^1-k-{le z!JJ2M9v6@OfPh2RkXdY^`T9*Nmy1BT{6s;tc=h}!_Xs=VGK+MwhCgP#mQp*g0+rMK z{sV52sj9c*<#5iF_2`G@9Cb--0IOd2$Yiqj!?$m`C*gbvOl0iOberVv3klC9w}DRg z37)X@TW)U@W;aOxkJIYQHSxn?h9NwWF_~Ow4aAa&mUS77>W< z6qb0KrQWWQ(H3=(QlXtE!i9&kPIvKE``IC}fnW>6LVRuGl(a(p)v_b#d0~bqoXgY&Kb^fRTzN9@H1ws+< z1!QN^5NR#F-Q>a<9}3HL7E9w=tfM1{B*oR4BUq8!H2i(Ct^#0$b z)M)!91JlKwl(Pu=PPA?I<4JJ=xMxEoIhS>zfFz@(BnlktJ_v!SJC>hTS=rVA%{G@E zhRypI3$F@tl8Sm#u2r2jn+@_mnp=uVHJ)ja+TJ_i8&J89`a&>*)y=?a)=X^+m8fsG zufZqri$|@ah1n{V1oni)R0N6I+0g3rZ(GKViS-mNzseq(%v@#KXKC)vZk| z6a!VaZ?vlc{|+)QVS?64XtZ<-A51tC4NK1EP$b1;$s-d9m8VOCG=eerDUw1G3kG@H zOJ`4zGoScjRZA^$y?6y>Aob~oBVApvaJODRs{!uO)4LY<&J6#lHlPoKRq>T4#}AxM z>(wl%tNR))yQ`W+YM&FSSDztuOV7VR>K_a)qAl#%h}_CW1O)SR9?G+VSVk}+qa@lP zt|OsDK;lO{TECv~ZlM?b6980~l(ZwzZD%T_41Cnzo@@6{v4-BcayQwz*3K57vYh^< z{VLZybMU7y%TR|h00~;n0q7J@qLZr}w0K`uKWqymTeS3RT1}wEoN`A#UyzqHC{oO~ zU&IRDuU(sBC-=&1?TaXBi>UUkTsBg+-CA z1rzA>_68JH()rHVECh^mn&7(%Y<{SZsmz+#SVD<9k`{TeqJ_UX^3A%7X;prPKaF_l ztVZzo2N=&JYW+o{n6!I4NB9aj`EV3pKVg4z#*|LZH|V=~B0u-g1(lcYWAQh}x}C1u z#MSwpw(chTIw;Z!L5&I?EoRDkxg+7B!q~P0<*kwYh$GJI;iVD()MUxP-%VJ(+^t}t znQNGiKembOw95SYkhw_OX9Qz^W2FyRG!Sj5u43+|28?(iR+@t&qO!pV3k*bhK@KY6 zH%XDvhF@A}{9T>|p8%yz7EdsSAe-1lCnkMal2wHw5uPtAJ>x~FCPAkWV7uO&lJh!a zv()u#zvEPJraJ?MtX6J_-_Y3$rhl&3HG;DujmwmJU0O{k?wa8EJy|iK(phUh5)jiEtkF9(v9Cw9S+DHkPp?i%2tc4v~E zaYNg~S)4F(w8ISwc!9t<%nQ#EnDiP1rfXdM2~5Pe8az&A)9*``yU9*2RXqTJQx%!` zLrCCI(tlgE6}-Yq(kls+ACRK{8;;d1{lALz?z83~=6aT!tQ@JtCYQ>8{7?$@`ZkDW9Ilx)-20=bvW_ zW4@Qvo~?b)ZigvNk7|-y2kJ9NceAY^AQ9Msfi^}1XLj2Q+6%{Gf(r#g+_2smZT%Wd z9yFTlutc^~AV%saeY?*JyKJK{5^ZHnjkea6abwMv=+8>~@kOR34Jpb2FH%-&+7W5L zo(pnh@nMj4YkMXV;j%vVR^(76^fylmzu`F>7zPE2aIL|*+fKF&b$FlJAK&KASTtv>>&EuifVTRJ5~Rq^xv+v@pNnkQP9OxSBq_U()Cv#>knPNO z8eISv_P#FL^gQzf@~a>*_+X|#$6Y!9or@r~`q%$_eAOAAEnWw`ytr84Dy$%kAz?j` z9ZE&${8dM3(Jvx{y}jCZS2FZ{tO6>6FlR!;;SZtmDy?78mqKM&X*-2_rFM0O4_-0BGi`fe-7kIU5@iL z>{Nqf-e^z=WMNR64)0XT&vmEj&A}P!E)pN(W$omaaaoI|l-U^w$r(5dF;(OUS7u+k zA6KrTFI=yBT1l<*OG}fGlUZe29@3Dss%dh^hYES@7gasX808v>-_XhJS%(Mdtxp)R zoS!S{lncHlNl2BoxY}9H{6T&PZLn8IE3SxoJmD{UG=7&$+K0gJVT02KOKxx~FTU75o;?skh$XIMb4f~|@HW1g~yWzh_vI1S^TJZJy6DV>XcI8OJr&79Upfa1T#VH###c zsyQBv&CWDaqO73?OsD-wILkU&o0(8=WyM`hpeD=H*P$TxW?==moFb9wiqR!Rj7tEB z$N@k$iyg@!me$=sDRw_e^nwQBP=NxKk87Ry4hb^^y7%l3QR!ZldVKc+=0@0FL9e;%Nz5~e|LKRnWUD_v6_ z!apIO9wEW>Mu<6m%!E};TUmrsQj?E1odNrx9N8p^wpMoEOlHjuOvFcQYx`K%a+wX8 zi5;feD-YK*8I+>~{pir~GOi{7{Ju-%2bSFNSETDNmhS`|y)WGziAlpEB=dl@NHKuV z^8s&X!rMW}&AGK?`xVbrEQ;)n^a(F>uKz6|TxlOY!;js!)Vx@b6NFNZ-=GkTzMN=g zTou~Xi+nF8Prw&|6=LEc#ygwYvqii?-Zw>FR?#9Vj=XvDi!o|VvfXBEs|P8EgX-4u zxt%79fsmNy9pcG1j zDTNEM(F5&^)|5gEx~j@=K82qv+R|v#wUbz>^tMr|_;VZTp0;6CNoS5Whv_Psp9*eO zf=)cX9YiE;k-|gmliy)Ti5!Tz|8*iWX_48`x7H8WOdIV`ck(#%hI_uvaS9L!Y^1bHvn|78^eQ;X1I{;PoL2wvY#&{@(OTKDYB#*pGo?_asJ4R32E7I^tzP=Jxch8JJLiQ4hh zKHsxPklLbc1oLy5wMG^0R!=AQM&uk-`c7o2?tS8%t{pcG44I5VkesL`Xa;B&rfpBu z^LFjKfx>GmIP#H>;|)IL3=Oe%keaG%_(qo-f`1a9-NlR>gKMq(%rl8d>}p-p9>nQF zIoxvlre%?C><%QdbC9Q5Cr$jwA)up~x`EBsr}qr{eu5GCJ_JZ#=Jx%S!v|?PsV>%; zQp`le9b&~6fo=08vTd})=X=Cmfb0>(#%G9IK?Pg}!k6A#T7Kd<-biS!+k5DO@+ + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_arithmetic< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html new file mode 100644 index 0000000000..a97af770ea --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_arithmetic< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_arithmetic< T > Struct Template Reference
    +
    +
    + +

    std::is_arithmetic +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_arithmetic< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.png b/docs/generated-html/structcutlass_1_1platform_1_1is__arithmetic.png new file mode 100644 index 0000000000000000000000000000000000000000..40005ff39f1033a2815dc3c335875ae186c9781a GIT binary patch literal 1423 zcmcgsdo+{@6#qn9#O6`LR>D?PL*<5iR~W_Gmy_V?a%?!CYJJNJ)!?){ycoA z0DvCoMWz72Dy$m&uhmpXvroRK)XBvEm%zO$l}Zg~=EqAPrCd~_uPzh{sa2`L>T)fE z;=2z}H?5esBhVfIR&NE#djeB6WLF0%Z|(F~hk6DrsjfF_H;=!sV*Od7yJqO&uBcda zt;e0w!Wt3rFe|w;xgDmVMP5C+baT)HUUfSMZ3^n##i~iX5b=WZxmlV}Pa6g_z>FT} zJlZ)x1(Bp8Dd_(>a-dylr#B*ChlXUJ@wCg6M^2!iE!o~Z+MX7vJ4H%WSMTURYI6kZ zGvD*L#9z8t^r#BaK*TrjV!N@<}p zQ&f=K(npvqb`#Tfw93W2cZ3Q#TbjIX|BV8qNiemC)6$d7`PiT7xuHv(h7u*-n)dG4)5w{weQ6|ht*XOp+bCT(f^ki2C z%2tGCHViH%>WLNx|s^e@d9L! zo!7t934&DpKODjh75_WH##`ZS_JLGdK zbi;jmPNp5)2XE+=#DhAE$Pi*~4%*sT8N#0?7|$~+u)RSiUnms$+^*6QY`}h7GYq&X zmQcTY2vcmW+!R~AaTh4h44a&LnskznIGCo(9?w?_Zm#H1j-eWX_V9*CNUcyx79k&P z0cuFda4veav%`P##f}x;1Wax;C;<~d)2 z{Pd_q+uMSGrdW4ZQrHi}F9#9cK8=n;5TLaBP|a~=Wp)tO{*5SMeTw1^(mjK7#Bwg{{9j9gus#~Y?a;XWIzCjjpCCG*@O|M(Z!Xn`64 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html new file mode 100644 index 0000000000..249c1c9192 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_base_of< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html new file mode 100644 index 0000000000..dedb3f49f1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_base_of< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_base_of< BaseT, DerivedT > Struct Template Reference
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.png b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of.png new file mode 100644 index 0000000000000000000000000000000000000000..00a503695bf832a920a342a2ffd2e26a03236d0f GIT binary patch literal 2206 zcmd5;X;2eL7G?$9F(a~yU{o}NW&q`gC&zGP1sROt$Z{zmm{ATF0Z|di&B|C4QYc42 zbmWLGkcfiA6xrxyQ=&9y5D=OGm(725efQX=o0E59`q{_t)^rsnxM)s+8 zcXywy$3z*sUtA9h2{$n+t(TGK5l4(eF0hj)A~1iDi{(*c9X0`z{k*txUDvefH2jG* zz$JB!Z|&_rl>&>I;18o`+gJ&)qwLz*w$hB)(FWEmE~*w5My&?JU{or2rka1+2w%fa zQ_Jt4b}~W<_yFwMrGrc(tZQ)}J!<-wGUGj^skGS%HX+-pUP;0>KXY?~(FxxT!(Pqz zH5Kay-zjzEjh!dAbgrT-Vd^~Rx;n6@dP%U*5B+iS7}+6UdF2*kq|@a?oT_*_Q~)mS>=W~bL{Hd#U6QRngA~bs%;_%xVn!1{b>;Yq+7c6P~ zkvpQ#Vb)fce%<#se0br|_0~p1HTgEW z-=3_U>rdO_Id(~tabz%y6E<({172z0HXg72aJ5Y1 z1=pMIRMZ|5UdW1TX@%%n`C}9ZEQ%?W)JlVpWi9GJf9USUYYRpBj{FEfri&7|a@X;S_9 z7r6G++@Xsd1?8+~%=#@`Wp+#PQ%ouLpcWXvpZT_u&{%s%|;_fFXBgj9S5Y@(e4O4NAB_ELN9(BxPGTbJ=@aa zyN|hi$(HWL`dL%WrWu&xr4-=Cvs6{v5s+G)Eg2zJ1bq~5Zu9igIE;FTMjcx4!2Zj0 zxc=x)C4}Q_Ya;y%2}&%=q-PDkOxD;!{|~9 ze!lFrN;mJx9X#r|zBgo(|{ZHZP%&M?ZQO3;(K}Iovp$uPR$y z@K2J1#^@~k>x=K2`33h&i^GPpUXANhUY|6Ey~dI4Bb$C2gQ2{pLUdZ zN|&)jAhL629!dA>U72^w+$&=DxyFe(;%{3smJ0`C29E0|xGJv2*SI*~-0KAq&-LNp z_wCReyf)MCOln%gPW+qe$0#6ryAL!I9$#+I2K5CNPG3z8Z%w$iXZi^=-~S~JNAwxY za#Lcy@eW>a2e-snwqKslv**5FInhTRsF&#McQ!!hq~c?4saaNO=AmIdlZ%B>R6>bA z6{b!noIe9o`;r?Ez&N?kZ;HUnn-ZPo#<4@1lT<>v`05?7AzCj6R0bdg2@Ceivcq@5 zUC;H)YnY$nei68#@2C`&2G`d^mJ7SBf~Xdaw~D93U(_t>=&u3#{B^6kPzX%#+?6!& z5i~Wrd5HF5G?C!-m@_pHO6?lxLiM0aqdsMjPpJd+;D1Uk0m%Q%RbdbQd;>C_jWEK7 zi#7b54d#II9}R1D!cXkp=`R7>9j_#wE00V*abtt|OL&dBIbgeoXQa(`M}%}Z0=aG& z9-g!=3<>@$Jbvf@S18SkE=@SVYj948F0FCqHIx{!@q*ELIHp}FL4qefM9jdMax;e+2q^$m9MQYTPzTI^$;=D z1%ZAy47={w5S&Wiv8#8SgLV;d?HaX0MRGFuXcq;%G4}HDg_Y(J(VJdo-rApJ4Q&~> zC);KdMA!B)gO&T4sXL6uX~xMzP#Tp?GR>3SCvQH|a!;C=8>}5X725BWcnqMR-^KDA z_#&`d*&ItXQG$>{gKEqrl(OA!+jg-J+Yn}otD2TN=WZRpDJS29Cl?N=S43GW=ab~A zi^G@^qj&2I1l43L_9mDXOaU`>F$3gk_SPPs6ySx0*Yx7fL$qdsJu^mG@o?m*EQ8Xk zdC1RE{1SxJh&!;*$MP!vF7_duf@R~)vbz9zG2=pmCD%QFxQs$Z{yzEU%D3?3k87MN zD1>>tbZ_igZ&s`DTgqHDJK3{HnB!B1W~EPD{G&N%)2G9l?ISwL->u0f;@}1MTZpYc zpk#^)+te(Ixb^`)?|kmn=va + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html new file mode 100644 index 0000000000..023363b0f2 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper.html @@ -0,0 +1,264 @@ + + + + + + + +Cutlass: cutlass::platform::is_base_of_helper< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT > Struct Template Reference
    +
    +
    + +

    Helper for std::is_base_of. +

    + +

    #include <platform.h>

    + + + + +

    +Classes

    struct  dummy
     
    + + + + + +

    +Public Types

    typedef char(& yes)[1]
     
    typedef char(& no)[2]
     
    + + + + + + +

    +Static Public Member Functions

    template<typename T >
    static CUTLASS_HOST_DEVICE yes check (DerivedT *, T)
     
    static CUTLASS_HOST_DEVICE no check (BaseT *, int)
     
    + + + +

    +Static Public Attributes

    static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
     
    +

    Member Typedef Documentation

    + +

    ◆ no

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& cutlass::platform::is_base_of_helper< BaseT, DerivedT >::no)[2]
    +
    + +
    +
    + +

    ◆ yes

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& cutlass::platform::is_base_of_helper< BaseT, DerivedT >::yes)[1]
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ check() [1/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE yes cutlass::platform::is_base_of_helper< BaseT, DerivedT >::check (DerivedT * ,
     
    )
    +
    +static
    +
    + +
    +
    + +

    ◆ check() [2/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE no cutlass::platform::is_base_of_helper< BaseT, DerivedT >::check (BaseT * ,
    int  
    )
    +
    +static
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + +
    const bool cutlass::platform::is_base_of_helper< BaseT, DerivedT >::value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html new file mode 100644 index 0000000000..681dfbf0ff --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html new file mode 100644 index 0000000000..99556de73f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__base__of__helper_1_1dummy.html @@ -0,0 +1,146 @@ + + + + + + + +Cutlass: cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator B* () const
     
    CUTLASS_HOST_DEVICE operator D* ()
     
    +

    Member Function Documentation

    + +

    ◆ operator B*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator B* () const
    +
    + +
    +
    + +

    ◆ operator D*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE cutlass::platform::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator D* ()
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html new file mode 100644 index 0000000000..d9fc909147 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_floating_point< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html new file mode 100644 index 0000000000..a4612f1743 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_floating_point< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_floating_point< T > Struct Template Reference
    +
    +
    + +

    std::is_floating_point +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_floating_point< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.png b/docs/generated-html/structcutlass_1_1platform_1_1is__floating__point.png new file mode 100644 index 0000000000000000000000000000000000000000..f1bc33cd0e2f8dce193e9d0b6c8cb63e4e73bbce GIT binary patch literal 1768 zcmd6oeK^x=7{?cNyp&ugIwi?bnr%7M21Y>+445yto}IXk8_>=KF@XCpXd8r&vV`Pb^r0bp9D7-q?YC; zO$Y>{1voiiArMuVQoE_EDrt@}J)+z;x;eW$E-x=D<>cp~{08z(rTQ9mb#-2)k)Fz< zItlB7gD4+;Ssl&pa0ui(3gBSx9=T$ydknkHmK!T>;cx~QwdYq&x0iQmXy`=7)i%mn zp-E;fk5Xi!a=cF|zRNSTt9*S|IbNq6x&zE}0=EL+px6%~-~a%s5`z{)rBLdMF&Pu8-vy?PoHOJM5O+`J>OuAH5INWNll>on(0TZB0xHUQe+Vn{HMLJ6J!eBto8|8r zn+slC`eZuZaz)BNjtuJRnW8?8S~kiVk=tH>@slLnDu7R#rY8q`!N*7k4#_yLnwb6V z!B)*fXaDFVHE$oF)ZTuIV4gnterlau*2fWc61jHuC-nH1`_Zf$O};A33-mX}-JS)T z{T_t{b#%}P1l-|W>362c(W36Is}hS|Kdb7NkoRf=HAaNpI0KHzC<*u&HtrZ8&OZ~K z|7sR#mpL?^$OoE8VI1qFEOuqe;1L*S>${N2!e{k)u=Js_Wx%Q5ujTUEyUxi3&CxiN zXBHc#i1;vNG|=(M&?J^!YoW$1XsEymvl)Y7%{jwp*vJ1NuGM(y1+Z_{KB(Q zYK_~Q#O0uRf?D5-gnc$D6gk>H+J;7sPRktQx}=C+jvWdu-(yQlSJ`xpgsJbK?`O#G z=f>B!rwwyjMo1a*U!sSWM*DAdTV`(dZ4P~ZvHdwit87ni2|X@iOM!hwKQD6T@&bf^ z^G5sI`{$dQhg_#_WB1}8a-hJLGv8A{dhCWO6e!;;xCqu&yA__Yx+ zt#-NKnf50M^NQ_&Uuz~m75}*=k^g=nkck12dLGjl>N1Z#PYD!SvW)cd~6#P17XjaspOE*RAe~ zT{Ozr?eGdD3Z~@h9tSKuMn%)g%~VM*+@>!)`c?n07@hdWd#86u@!4YDJ>DDRead!J zl;cL(fpXUTo0Dcl1nozBwNPGgfzl{u5C;VmZ9J{BlCm(Ry2{3hz4Fe-)uvQd(O`j}T3Ooe!z&e z#?czMOOfWGjCVMH7@iUD{ZeixYm|I2iRpnPrlom=R}Al$C1W+{F2|x0!?Vy) z_~``H0++$gGxWEdvE~p-CpLY}uKE~vv{rG5C1C^pPketJ$KFld6T~c2bO?|7MYK(b zTnRIFT1D@))L73&J|ZnwtZB>!X}}KOBz2Qs@-0+v%{2D58~>= z#fHTB^jm9B75QA7xaY32O84P^u@3_)=6ODT + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_fundamental< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html new file mode 100644 index 0000000000..0fbbe471b4 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_fundamental< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_fundamental< T > Struct Template Reference
    +
    +
    + +

    std::is_fundamental +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_fundamental< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.png b/docs/generated-html/structcutlass_1_1platform_1_1is__fundamental.png new file mode 100644 index 0000000000000000000000000000000000000000..310dffc3f163ba88ae515dbd0859e426af54b04e GIT binary patch literal 1831 zcmcgreK?y}8-J3tW~mfgQnd9E()Lv=W0#IJw0p344AUTOsa9#xcA{uZ=Oe0bR-xYN zLzz(dMSV4uir7fW9{T_G-+Qj>oZo%!kKc8l>pnk@pDz`M z-HrtS00()x(*OWNL~K7@401Z(%XC7(*v~t_L#x#yrcyn9ua%jDSQ}d^l^(54q$5|| zi!@(<07+V3{qg_@0MHRb?q38Xq7=6k@Q{U`hdwXAS!?wU56Gjqlk#b<3xx!Spob0G}pxdRDYyZ;-5@FJVi12FV0*;%h;~4d38r5G)b* zHIj!f9yW7>MFRgfqrH=vsUnikF(o-4f5JOEk2llXDOTm0M7Ho1RuHcX%_^8(9Z2n^ zx_I?|bi0)6mcx&UZJw`Q$(nnq(%^sGImV)Q&;oDUxWf_2&dPp?%g}vNcCzd)DMU3- zh-3T}g5}s%fcwudhFfBy$LyO{la9BYSy&A=T&fM#8adv76pfqT8mtXDG?{`csr~(j z*GtcAaX0@IH10}yc3rSlrD=Ux_q>!(w(Y61bxyu&WM6cu=f-~^rlc>NS5tEyV&30# zlIp@Ocr}uo)Y_zWm8`jg9>*0AZ*^aI2S0gBar0*{<`z=C&hAuMXICoxu77h}xVJCz z2u`_r#67jPUM5O zws&~nbqLE2&Jf#*=nQdgSg}cR^Yj%nyP+-(5@K+kb ze!>&Kw%N7%(v*5dRK^rnk1YF(wD;Jt}Vv-MPH6L#nX9j z=-{Cc6c3*_Q|CHttJv37J)TnBJ}Ag&i%rRSa&4^p@MS88dm(s7)s@FVA1Cti^%I5p zWC#1vD09D)xF-2S+1k!@bwb^kQsz_i=HbL%Gi{{_c54MlI?IKI;YGqolou>A8EPP{ zQo*?#zP<`|eQIg#l4uXX7P2$EsXfB>-mBj>-D-r;U_+wTVQfT=h(rw$ z(Og3$nt7DCp568Sc;zT(+@Z(i0{ zOjd-5pPHEin3_xiT1h_X*OA33`da%VNPmhpzGF()m3}mYlf-J9AF!z_gDupc-zGHm zahJ`vEU~6SB7zg5DxC!TXBQSE&gZN)&nh8|1mURx%juro7WO+-*4*eG`of2Uk2PDK zZqMpo_>6a09GD8|yf8tSu}MWMne*pjZOa!4&|yJYet{}NrXc*(-?ORTZO4b?pg_;y_A)HT zu&Z>=46W1=8l({lu-CAh_V37(9B@$PP+A}R^5Ux~-tm~nD?s9An@ypd`~Cfjd3wc= zX$r-Z`7M6tZOzxur;Mz^xiXs;Cf)nNw!*-wX|vZKr$zk5N1bTLq7o1I?v++c*Nsun z*(%iVLF7RP!`XnubPwUhk + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.html new file mode 100644 index 0000000000..a8218637a0 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< T > Struct Template Reference
    +
    +
    + +

    std::is_integral +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > +cutlass::platform::is_integral< const T > +cutlass::platform::is_integral< const volatile T > +cutlass::platform::is_integral< volatile T > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral.png new file mode 100644 index 0000000000000000000000000000000000000000..0c646de0dd3a3ec7e45e63517bf5caabcea0eb2b GIT binary patch literal 2524 zcmcgud010d7Dp(>vXnX?0#+DlAhIP$RY1Uqpzs0&${vw`0s$cgg|H)v9dJXDM>Mje zz?cvh6f_nj1QMl6BPa?YOB#`aLMlswi~(bk-Z1@V{+RwV^Ywh+dv|%ich9-!{LVf1 zbTH1>K-XMXLqo#=^79GN(9oiQ{;Abk;C=Z6J_|Ia!2w~rRVo$eO8zz7@F4R7=&d|_ zK0oSqP6QaPri9=^H2_F0gO9_UH8j3Rg?zlia#qcXW+QI&n)r+(ADX^WSzB(6J^tAU zHfJz&q}LB_XusBB^A2siMBQ&%;;i4*tZ%%jS+W#QfP6sJD_m~N?HHO|QiNQS-A_wuN~86lz0s<- zdkK%uBRd^j3CHf#3uUsvQle>~+mVz4@s&zn7Y_@1LP6N}P0g0;#3=0?6PVjMiXa+!E@t96;-{?jW=|r-b^O9cC_C?{$WG!F&NEz&6q6%GsS4%b84B&G ztC>=nbhqth|2g74oU2)eGI#~=mwjxhuSyisx81t(_NW-H*|AVb>FHRor;}f~ck@~Y zyoy*(@Z4yL$+;@iyB?FZjCS;gD2{c81?2L##L;}rMY8fAm875N%`%YC)nRxpx`EXhjVx+n$V6R%|CmMYY zO1BmeR=gWqj)IgySN|j2f5wTD{ictd^t+sc>-e>k+PcP@B>MnqHc1KygcNm4`C~-C zkqIDdz@s|;#xS={!LK#cUk+;kbB$e(1IFooy_5O9ruLV0AY__4z_&Y#w16FT{8hlP zzXHC*uEDWF-RL-0BG4U^9Dd*1+z{|TVyy>5y0b{}-zVlH2jA(hw`nuXb$7o>*sMB| z=jh<9kWvcoyR3LpX(aCCPGj0QCpWJ|hj#t9U%Z}j>9y5E@~{b6kES{gT0f_Byk1CS zuIp6gv}JLhC2CD5UKfJBORg+o!(;3|DU-j2uGQyy<;o}HF%hRFvn3Exa8-J}X7 z^*DNkjRoPH)A=$qs+dQbzC(GBCq8mqy`%Pk2eEU}<;NT9sAg5nEq>&zl{Jb2N=Pu- zO3=uQNow=YR-_PL&J4Ja*XkNC+2ljn^V`|P))!ZBmp9YMT(;rRuZ!hbz!vuMb;?i+ zJNd;=b#FG-`nf3Y<;*5Z+bEV*Gp$s$f}qZ@a8|||UT1Bp>X-OiwYPQ`UYtaA%P;gS z2@#fu=&g23>uhb|2ZhUrJjZ%Y) zKmSaP0$}?se^;XbII8~tvZN*L1xVSTUlUDB#&w=EKneqe>UW$U3?uwY{cRyE?C_ic zC+J;7K@rKe)&i5NFql*iN`kH<6EU)@0t-8aaS-@=CKUm!gzglBB4K}*3jPSRgIXlo zV%)qh+<3%tAuDTd#{uQU;wS>2(1mLFCTJ=WCJ{qz6|el@(lUXw<%-;v{- zAm?#YvGV3qmFFo0K0ba1a-PEXD{A~+p7`jLl2|mskQ@FRw!fN^bhZy9Q0=z)cco+K z8)G^=y2wIvNDrlouprE12)8z%DI@$GjAeGNT-wQg?w#x+R$>tq=K@=DM9(JIbe^T~ ztlAYPAoz=FMtfu49|grs^waYBruifF%y46@`f=az`?}akyo^15>ak$&m5RK_=uDZf$FS zsN3$*XTly~hD}6niG2qh;G0Sx~>?)654w8_0$P&)uvbG`OdI=qIH`^F;g(|FM(% z9*x;7-;X?%+;;`gf=^*OOI~IL!PXa xN#EP7WPemb7`DqvAEe9va+|H-&Qz>grIgGW`3y+UfUB4Wv>WGhYggQf-vQ%YyAuEa literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html new file mode 100644 index 0000000000..6de7dfe3b8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html new file mode 100644 index 0000000000..faa0e6d2e8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< char > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< char >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01char_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..62f9f7b91a29cb2f1bdc66da42db56df3ef1f33f GIT binary patch literal 1029 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z&yp%#WAFU@$KBVMUS-vSZ6O@wdcQb ze7yqW;fYF>Z+D-(Hsxxc5-;->_8+G=>3DC_Ic;=M$+J_1^-`}&B{D9%Gv&Cdr~8%( zzhf5|A&gc(+-+c?6 zef`3(&Tm;c^LLcp_-@aWQGJi?V(N<^zTo$>-=(Zme)C*&<$g1B(=~q$|8F_nUQ%oO zeD5#IjIFK5UvV>Q1?@O?{BQqt+tQi44fzCVf)$^vn!PquI=s5%L0ZJ=+Ea;vF=fHeCEv*G%CvKS zxr0k%5>tDr^2#$c&yR)QlYW(El>B}6+_Nb&UX=en`_lBPEW`2Ny^^Q4Tv_+l{BU3C zj{80C?&W@Kg>Njon6;zz%8xQX&!e%oGY(l^6?!`_`0uP$lH0DOpI@nvKV!K{uZj7j z60zx?lj0uqu2gwBBYW=OQ1iz>FPu`%>7DdNOi%6a?$XbS?32E*#T6@i{FeRA7z;oQU*A)V21}$HY%lRNsNZF%xWr3c>UsN4 z&81V8ObJrcVBmC6K(XO@sQLCVb*XI=e&5Y~%zi_0pUiEhh(D*d|9S9#@$Bixe{JH_p@*5{-`Vez3Oas#$TC`YmZZNZ2DcSbWbf~I#o+fLJuhni z!je1saSaY7$z|^&7ONetyL-gycgkIvJhj6v`)}ueyLP2;GHp!0{icrJL1xAH1zO!FymmYVzopr05HS*AOHXW literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html new file mode 100644 index 0000000000..8f7fc5b489 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< const T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html new file mode 100644 index 0000000000..769cba51dc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< const T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< const T >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..91f226cb501717d04ad5b461c37505f1ba802d18 GIT binary patch literal 1504 zcmeAS@N?(olHy`uVBq!ia0y~yU{nCIJ2;quWI^h&Rv;x2;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fi>UL#WAFU@$KB#(-!aHV?BL4`|EG} zPt#eN^f_A3soZ^8{LN&lqKnv~32*mbiCVpK>otj0Dw3XE8x%afkgXr$w zBR$=FHck>LdbG~*LW!b}QB2NOyIl`ov&^m9!Y_Db?tJl(yfE=~&%J~19Q!^$`(%-(zbKP3EU1=9j?5T%8e=l^OxtsmnV%4w5)iQYV{{!TeciFMn-|uw94ab*E#x`#ko4)uW^17_8 zhPNxc=cDHRU2MJ5-rKwH&QGeIcI}dGN^kz|86k_3s~IorT@#XhmvlJi#o_Ht^6!)G z-ZDgiVe;>91mu6%S$s^9BN_+TEnhz zu!%uPhl9G*!^bRF@g#_f)&)h@Dw>O1W6pb7CH*Uu(e_k)+@JbW-Shteea@;~Z}KZ^ z|C#;Q$*45uzW#ZsX5QB?^RuVw{;1lv_r~vrulg@H-tK>WR`>WFPvl`iz%TE*mvlx3 zY*=&tuGEqr3;4XmXFSZ5U+d@|tRr}3z2j${e=VjNcRY5*zKQ!9R-OGvY}2vru|8V6 zA7^y2?vC)*$+W1;pFMra<~<9%Ji98AU+q_Xu}x0?OkW-Q)jBn@C10wx%wl=6_%aXO>=F<`E~nQF{nkpPTFZh@kGpG-gP z(`TIXG@Zrv?$Y(5U)CDl7uu2$zhj=hZ=v+f`g_KC2d{5_yY?>ontIRvyLB + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< const volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html new file mode 100644 index 0000000000..44de345ced --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< const volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< const volatile T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< const volatile T >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01const_01volatile_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..c15aa4dba54d6623ea92688882949164fdb28993 GIT binary patch literal 1547 zcmb`Hdo zGE6$G;xgh^BO(wiX_O@f<`)85{m)V}*i@XNCVhSdEBm&^65OZ1hFuy`*I zZ$P^A@r(`lm;(T`0Pl$PNt6*43G?Qwl%aQqiIUyi9i!EZJBzxWHg-kUE`R#;#z54s z$fQ=Xc}yNLqyoI}>gA{aB60iZ$}U+82N%K(HlC1g{7?s~#m-h@N8-QG`L_90Z1OsH zL%tQmR#AOOY%JY$zk89z)&4 zG|`=bloT`Q$o~CAAMv5E@GN#FKvN?D@?W|s$XYE6PrzjgK|1hz%LhCT2xIC47yp(y zEft)C?B^9)VF{7q{vFFA(-fbLbYkk`&G*xWBP3z)u?>vvuj}lV5Pcn>n)$9@UMeVt zr{G5hBlhl&k0ahkVh106(@Z+$wEDb!FQA^WX=)h;JH_gM)<(HFidegV6-9bDskLt#Mv&FjsKPk* zK^%<$eFYEA7uDMdsM*J`53 z+injD1gqCF(MG5_xvX7Fh~4`9y|F)vgFrEEr|W2Zs0aAEk8}u$-#dwN1(o(=iDmN& zUJ@xVuFEQv0dzCzhtk7I0d+gv3d zVs`vfwH>xzu!MRv&LpXu7c4nzvA!82nz9dfFL|+?zqlJRNIjKqP-oC%R$3Z*QjzFF z*@KdZcW|u>dHqq=@L6{PX5e*u&0w;#cOG=>nvJRgPElhsxKbAi?`qQSo74F%7ot19 z(+2ZgI?d#8R^Ov@KR9tNX-@VlTcKL@!%0?IM&~jD4#x+OSy7Fbh+9)E10jLn|8}~~ z!;7k-_+Ua@8%CduD>t;MaS3E d)Z_FWxwPiEVx>L}y!7V=@J=3%)eb@Qe*tC;s!sp_ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html new file mode 100644 index 0000000000..28ebf9d1a5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html new file mode 100644 index 0000000000..fe037e174f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< int > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< int >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01int_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..2817eea5e5f3b1ac8320b32a33e5378942023597 GIT binary patch literal 1013 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z}({L;uuoF_;zk?(PISx*4gf%@9Lj? zWfx}zaNhgB=f3`n2~_I>q*Ivt|h*ym$>k055H0hhi3cA_z@Q8;rH zZ>rapm8FNzY87nT{5~yGbG4~y>zvZfTV6eW)n9gpC);wFjM&jk^W1rp=f;}feI8=9 zj;U)&=iOZsL$1|4KNfzE`&C-%v3KUuvrigaeE)6s;cP7#hTp$?B~N9ooO?UJO*XrH zzhr-3fuF6~2A{>(3-~iX{+%q^w^aHaHN9%2v$lU;@Z>#-^Cp>Gy6bT% zFh0u6`k61E%1c(8=Wdh!)nDpWsYJ$l zd61ZpY&idrTY)iRVjhD|e7q+60T4sn6v&v6$FR-Am|x8zir#b#byl_^M1~4Wn)-w`F0ualK+PPznpF_ zd1v^2rC3M!jHRbHne<=X(IFAlYtJb2Skt_D(Z2<~=exh;J=1v|`h3r^XNGB8%iK>d z4?p33M@Kt#>#}7Nob#?_uL?h{>|Or3!o=t9PRrcqs(HVx=Xf38`;p1#Ox~Gyw%1p9 zev~ZTyn7~(@ZY@sH=}~B^B&b1>)%e?FM0P*_un)dtwWYyB|aFviF;|g|{BGU;_hnw*?RUq#`_n}Z%>FUAbh9e&gVH_Mc@Ml- ty~|K>iMK%=LOp-9Y)a6iAg`G}nV&UTX|I#_{Q}IR44$rjF6*2UngITt + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html new file mode 100644 index 0000000000..2643071c14 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..01b1f205d0c217d7e9d9ae6b706978d879a379ab GIT binary patch literal 1017 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z}(^K;uuoF_;&8iqSXchtkLeFKmYsh zi(|RPCn57&&Chpc8ONEPCEKQj7oD>_VfmbMlM3r36M;OHlfp=N^Fy_}dY-oyx5d1_ zl;Gq+e2e-e8wefdjUO69*l zN>|V3l~2EaVWA3d`2QV$Kh9dduITb{y+!&)J6=|W$WNJe{FbxFB$Er@PE_U`UOv^b zG~?B&fQ;bRZL>8CwrqZ%7OQ!AmXy=EU6Gr!_P<&zYkoH>&%3Pq(U#dp=MJ7*H~Urb z468gg6;I~fpWT9HRX#a(evkN>*(ZDM=9`&K&Rfv`ZT8{pRhA6jzDr-8nz?fB?fhM` z+2#9R_V+!T6UDZHZ}GW@QbCViTdJy-uiYzoZe>))=Q&x|Lso7`leb;YR(aP#b>;G1 z6PGOB;Wg!?6gC4>fu6Pf0k+4d?Zgil?WuX_|JVMm z&E2>B1;w@>PqXZ}dw=;M-Ti;#UadQLefQ~bPyK~YHydxx)A!E19q`iO_WXlqKP~cR zv^d_m=h8$-#0Tv>e&_ex1$(8;*PWVm@7%G_tj~*H`^d9?m@qld%Jr+`%lYCj@AiZ~ z3w^$4+soWDTgskK`CPW|H8iUEcFw#W^7rL*tE{3d literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html new file mode 100644 index 0000000000..82a054faef --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html new file mode 100644 index 0000000000..a3f5c11d5f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< long long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< long long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01long_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..f3245da6cbb1ab28c9d0b47cb07ff1915a9fd926 GIT binary patch literal 1037 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z&yv(#WAFU@$KB(MUNE(SZ6zj{`)`q zy0}iByGPNUttYQdx!R}1%e;lXLiPEa<~hal6ug5JJeeQ5O|n78)_Ip6Pg){s`lN39 zG~Hhjo}1>pxXQxIpEK#(!NC|X zHEz|a+k9iJa(=&O+hV)EIm5_AbN9;azU4;K1-E^kawTr|Y@x9KGwNSGb)L0v-sifX zky~yZI{r$UZOW2|ujIGe$9=w%ns@Am>laTp^Sd3dnlF8xQO)5wiS3r4Wq^4h0nsoOR5c58l?d82bTWWMvu z9b6uh*rt^#Yo3+a`Ely^#$7Yd-1wW8Zv0ZMYRmmOalvO9H{8oV6R>P)S?>OqWow?+ zU6z+UYx#B70khty!=_7)Trc+WdUwmscuuhC#dnrh*R7nCm|gpAj?|OV&r_Dnd*wFi z+#@5;OVTw4Q&raPymr&R^zwZEtubwD11H%XI=bZlw!D9nm_2_nyx%!-$^ZEU%AWI( z@$A`iiusDpRkA#I{()71!KX!<;q>+J6O08QhOh4lMuQgV2HQhC4eECoDh}~dmwMj5 z(^GS)!qO>f9t?~J9Ck2%0NUygq0Y0KZ$BWs5ts%9re==Eq49=_tX~F`|tDOmsWD>&Ua0=5p2^Muh>7EU-fgsDdT6BTUg#+-BB31?OCpAUeIjuy9s`KV!W+NE^DshKlNje z$NRK1M(eGfZ_T}T%qP1%ujt381LsQ3Z+p$Wc157*)Y(1DUakLqD(Ly=sw=)Pt@Eap z>Fr**v;9)x{t6~GH}kaba@kjw{E#f&e8*>3r`%if>g=qC%kJhMOMSg<%D$5yPv^fe z{~LI5PH;5)`o!yA>lN2CCt5F8@V$O-meKPNUmlb2YLQ&ayOySVSt1Io1QJdz|HQzv zXJXaOC+0gVb9P&^+P$00z^*ep=EV2Uuhaac4)C2;d;Yoh9PfeuuBgFy*4ImuRdcDy bC-%M5tvZh`*)SKFcNsig{an^LB{Ts57c%iv literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html new file mode 100644 index 0000000000..3dc681ff1c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html new file mode 100644 index 0000000000..119f69bf53 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< short > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< short >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01short_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..964e45f8aaf816d65a7091511b64566f017b9b8b GIT binary patch literal 1024 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z})ZY;uuoF_;&8wqR(siT*Yr2@BQyx z_t~*$!uD8o|Ds#-E?&_tT`90Z>4E;~O*%(6>6~U0^VCw&49rtG8H|KCKVEiM&eJNX zCFcIE#D2@LNjmCorj9EQs&)1*cdJ}HyZG^6U-P%$_TJyQ@4olLX{vSK?kNBGRrd46 z-Rg}0C%;wY_}pRddF(G7b^ExAQ`>~1sH!)+Cht6Nur4^LCd}M6=Z{f+&++z_jqhI9 z{jhwowe9&UZ{e(ozuZ|` zaqmpH`;O&u+!50avnpD?{3!JIJZt+p=aQ|d(A#^#h5lc8vfpq19x};%ZSbVDHfc|* z?$aus`X7#kOe#53-28XN?4NZKPbc-cc~)`ztNp#X>*oY!&no8iJ0~vrUw^7sr4kwI z`INC#&h8PL+%9(Hv;k)bmHSx*$)H)8RDiu22kXihB3puBIXC0=2WKQpDo*Z zt?OFUZ3kuz2BD{H2ZHk$Y@n3>uAL=T7xRH>K<~NT0luH}i{lx(|1Qt0NuB!N+Fbwo zbNPbez3;0T@7#O8wBWU@y?o^L2d{5GU0nWt;os*AHs$?)Q~TJVmZ{M0v(E7kiCg}$ z70d%hi_0Ya4HgVJup<>l)FCoA3lIdhk#jnA^yy7GH(-mYXe|G4MmZL|62zw_*hfD!Eq zjA-Gw<+fV2polJZxNTEied-m6zu}R!}}>@O1Ta JS?83{1OTyU=nMb= literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html new file mode 100644 index 0000000000..74a51c68b8 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< signed char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html new file mode 100644 index 0000000000..78ff2a0c7f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< signed char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< signed char > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< signed char >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01signed_01char_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..d8ed29c38b941c518aff4494aeb227145ede672d GIT binary patch literal 1078 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z4O`&tG?AmU7ft++ol64jr((MZ&SU!Emt^a(iD#%g}WY;)RA!d!(}#lo>q%m z@4P>i*lid#Nk_fP)bU}X?IEpeV@Pd!--4v*Y4>4s4M@y z;%s)t-|lZ&IrDdv-S}?LlTm+$asccN_8vCQo{hCJ|U2P~ErF*<+%}3->2B-yU8* zHHXcFS5+k{D|W|Brp0rU&3}8%ycWW-^Ho~@)nixXWOqnjE1F*vAu|2$l}a}2_Pkl= z1FY6DbuBrNc-KSAc>SFpr>dRr`lP+7+kC^Pt#jY4e82T8-!WL+%RdvaY-w5Ue#x>m zhySM8SnVo)W!YFOwYDwVqc{Kgl#n}H`_<+I>t6gc=hgL)vm3VCRr|449=1`K^(b8k-r#)d1yKR$T|&I2yH7&p|^ERkVw2Quzl0WzF-F-A;GW3YM2c3^TIm8tqO zzM4x_mQFEpWb|NQLbXBt&K)MdAK8lEci)!d-=MHh<~CEro70QM9^SpG^+VD;e%;a6 zc?@BHI%faA8OQyg@P8y%|A~K}XD0IRyMNa%>$6nUe?G0(A2)4#YrTs3#c|`lO;2wH zANh8;=2%1crW}X2n`>v*-+JmSHQ(_3$KZ_c=f_fS&i;ONhqvy9b&9U{_s#d0xbv^- zVaCg}fGcuWK2}b==X=Izz18!r`S*?=*_)Xqc0}V&2YQJlp&C*&SEQpI^C`7=G^IoT_)b?iMnn z?h4C31xyTv7ve8lUY$Gb-<((NKYw}DS59D*`E>Ui1GnCif1QDUR?R-XZRd&?ncG=D z#Bi`$oV~wx_EeLG1FZoKn``d#9@y`>i}Aw}X$F4?b^a_QCWL + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html new file mode 100644 index 0000000000..eb0734cd7b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned char > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned char >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01char_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..fb350f8007998092304af0a7ab5ef3780d39d58d GIT binary patch literal 1084 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z0` zwOXeIlh>$3`t7;O(#kh`$$Oix)A)B;e&6?h`Rtb^c(AggYK7^o38nr{O|HC{+IVk z83m><8RxWt3&i*b?M?+=;j ze!{bs2?@z&*%lbHx>;wN|Mr{tErg@;>RG#0XIGc%-r=d<>2I0SHT~|DQZ{RE^Rww1 zvf+%bK?#xOlTMv6HsANjF2H=UWc5868MVVs^XHl`{~l7#xaPdH@zl)Kd&}PzK4Grp9+er8{!;3@yW)_l^J_mlHfP9o#t z!p*t2r{&z<=E|_IUA}=OLFq0-L|t7FUxPA`p_c_@DBfj|nV804^OEhrg?0z>PNOtczyY40lS4lon;^EhCL?S4HG}yeVO!Q z!|d(dAM$}t`>1Gs-FvUtfww>7xVBID_t~(7f1h2f-K!5$QUArJmj2$<_clI6{la;5 zojmzS;cI7{%jf4t+?6QM$o`X7uRUd5V8PjTlXr&wlEE2kiyv=#bNF}GouzhF+(*nx zzTbSa)vz^V@N={M44^ zPA4?YKhMkB|6bX9_UFafGfJntSlV;5XZG=3$Bdt6KB9N~|ttnDh8;BSWA5n%T;k4Bq$TI1+wvn=|}3L5)^%^QBWJO$qXP!Y_2$>hSOW S-A{m7ox#)9&t;ucLK6U_fC}ya literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html new file mode 100644 index 0000000000..e334b6af4f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html new file mode 100644 index 0000000000..669a35f45b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned int > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned int >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01int_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..62eb1b3a3680c06969338152f83be973b5bc5c15 GIT binary patch literal 1073 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|zGiVq?VXy4wJdvaUuZI;<8s*^-K%qKnZM8c6fH2c*&)3;3c zy}ZaOsb{Ln>DEZ!fEOD)f1UH*yyxQCtv~rdZVTi78nnZ^8YSCso)%V&Iwu0adpCw{)s?yp`P zbz#et2^lL(AEt5jl%2aPqn>INtoS6-{P>D(>#mhPO0zg!d+K3;&D&+~jBi+$Uao1o zR45vt!s_?hEokP?Q@!i=m|r=Ya_pXY_vw@yFT%f{eQJEwmf`vL-pErWSLW@`ZDx`aViT=C?;D`tPJ&3G!b&dsxm+h6VP&0Rme`BYx=?s?`m>0kY&UX@B@ zoRT;ioJ^?dqH z&81V8ObJrcVBmC6K(XO@)9mO2to6E2{=FFLRtJVqO&MffN6>cW?fyYp#F& zQTbhlH8a}xKY8|V+jXXjntwX2?kWGP&lj-36T_U4YmfU&bL<0S9rn-LusJ!!H^P|z zZtb>8vj)~{XAQNdtP3nS+iqf){7p1CV{P=>(?vY*SAV>5;a;@DwuQUTo1HaV#T)(PHu&y26`yKn1# z`{F&*97V4$LC12m&99wHHP@}ykp12(t{`xC#%=3sD?B%@o9%kHHRt`x*_WXS;g!$a z9WUR{uE;sN*D*22@1j9o$>%e*#&PfNT9^7<6fLM&{w;cY_3{Nixuwg_0Fwf9t@XO^ z4}E;^ew!;*%6O*2oFV+@yNU)@{pEG%XVqOf`*=}|*q82=@p2pnAF5xcsq#LUGM$~t z`S(8QhX0dGm>+nWGt7fhvu6i+d3tH8eqw)e(rWEu2hGjE{LSF$>gTe~DWM4fU{wyg literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html new file mode 100644 index 0000000000..f0de602047 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html new file mode 100644 index 0000000000..57166d8f46 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..e758fe23202e75174d269cef27eb4b082ad6b76f GIT binary patch literal 1080 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|znOMO>%4BuV*u?TSPn?>)g&I%iFy@ zlj(Fh#A%W9TK2qU2Qt#%rrp=PJdaB;{O+5@xAsI`KXi7(^xsnsFWd2E**fDJk-IYO zoL}zX(wM|_de_8|w0*^owcj(B8qeJL{OrL*Bb_h1KR^32@0DJ|*UIE&M!8$FpPwyA z(mg->=_JIWQJZZp)2kdGP%M>jVa$CDIJ1uZN#vEC4ZleNQnOERk+dzQog@ewU%*5-)YB z=hJs;E}gPuN|2fc1E+%miVf*6pM87LvQw<+f0^}NM!pXI;~N>Kd2eQrsI=W3!8X6_ zbD8MBE4&8;<%^2pSJ_L$fMhyxY8W`+hNTIi{YL^xx47OM08K?UsD(sXRNwDSNK;^i=tK;g9vcUX51R zws80PYgfNol+;^z8s)LMu3dcYeD{~Uq-mAcE!Y43V5b%mFYIkm%1`>pf)zFj%wTYv9JuJs&gcJ_tQ`>gZN9DaXp_Q!(v?^fD+haY%2 zuWIhDySA*US?<-R?gZ-^UfBM(LjK#8D|>e>|0T%KcGb8+^nE`|#HX@XubJ!T&OYvF z8UC@viDAyOUkAQ++>bDKzr-*tMt+CPM`3e@|3)v_4ouEtuz^zM=1ZqcniAyogx@R7 W>ePdsC$|H$IfJLGpUXO@geCwsa~7BY literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html new file mode 100644 index 0000000000..4c796bf97c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html new file mode 100644 index 0000000000..8fb6640e3d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned long long > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned long long >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01long_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..3880ac46fcf4f96e278a6b0e1d81977ebe243e78 GIT binary patch literal 1099 zcmeAS@N?(olHy`uVBq!ia0y~yVAKY(12~w0WZ8?w20%(8z$e7@|Ns9$=7+B@mK`dc z0AzvjfddC3HdcfIxf~@ye!&btMIdnXREQA+1M@9U7srqa#hR_GZ<2 zqiDX>0CBTeI1`{`nH;N-{zLyXpWt}dfm4bR_AYRDY*KkcJ`*o2dUwy zbA_Yz!mpNxM=j>^oMbhvR8>>fve$Y=s=j2(_fhrCj+1_)9kcbjzwYvPE2b{l{vqko`xUGCEhjE9|70CL z=}S3B_@pPwF`kq93sgPr4oy}0xnt)ka8#UU_+eMaz|;_YNVK7Q{`?874-{hIN1#0W^dimP7p8k9L z?L3AV7xw2QGq%OdVs$wE?cMg|JvXnO{`zxTDFe@s39G-X=vM#!{(Mm7o~`qvuiX3j z@9nq1L*CavZC|nH_-UbRb}iuzpI=Vf>OQSSwfJ(*=BpRCAC0ZFEZTaWX;P&}&%DCv zr#7G8#w7IBaGhJ^w}R-kyPvEzb^LE~)@=65-)2|7Z(2EFnZWI>=AUK%?pfx|WVL2a z-Sj@2+Xc6muRa&|WW~*t=&zRbH!Wg!-YUO#u;BX9r>}Ofzh1v1b>-(fvu0^8o^Cbm zO4{$Wt84Fhhnp_bUK{ADvS4w!=?>msQgTaXT)(!Se49%DQG(3tW0Q^XaBq#k+lwEXH- zAEg76UN@fZ_;~ty4C5bLw4l#h7v!bsp}ExfALDgvJMSBQZs&kGpTX1B&t;ucLK6VZ CcO7y7 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html new file mode 100644 index 0000000000..c7dbea3fb9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< unsigned short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html new file mode 100644 index 0000000000..3dad4c368c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< unsigned short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< unsigned short > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< unsigned short >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01unsigned_01short_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..213ed400fdbc2ba83adc2724de26423eed7ba044 GIT binary patch literal 1095 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z zuh->z(fwTN<{9@GFRt@(Td1*W&1vo1+j5U?%e~EL=BcHk8JMSXG8hSOF1Vc6uQJ!r zY2E!>nf;bwlXUcZvYLXLeTAgsrJi=@#Yw;a|8m*gd%x$t{{A~?M{4JM+wUCv>UP(B zd1t%je`>X@nc6$%V;`3{eft*ZazX&;(EeEQnboa@^Cmsoy!&P1WBJeXKmM3taIN+p zzrFh2Zy)yj3Uc)II)6tdJ}#qlrgLxbkvS?mXKG3D);^hI-D=|588Pv*#9@Bx&jA+} zPMMOivh?9Iff;Y-?7mrkD$#%Ggz~#@_PWI`etG2Vmg&i-GK2TLU3T91re&#d&9bGB z!yG10v4}OFbZcUI-k(#y5ACv?vGMuYM+P(1e!baKS?S-({ov0Y!=*CYZdE@&Taq+) zp84mig6~!yXZe=dV`}46^=HTOCC}#VomJxB>vFnU>)pzEiSPfuvYqR9?Pce#c`H zGJgIj?e?~{H@4-TX80pl$H3LV{FLp$r_aGf3^G7QQ6P}P^OP;YbtdD7Dbfu7JE=_l zF*0AfHEheZTZ~c;j1x4>8Riu+KhT6wmW9PXAF}2vKl$~xXdAQ63Hx)kj5_(Dj0VR4 z)0D#J?tlI4vFVTey9{f7Oq%_A?mh7XZ-3Wn-cfq=eDQO+n!jZ~U)-(E_-`E8yn4pQ=h~>-*fDpq1xJgI_FH= za~4+^`*_SfRsAgOzGl1aRHwM2;yb~QTkSUWG|Br0pRFv5J2gl5-AZBeoqN>t+@5bK zxg(wLptRd?d3oGowWC#cb*{ypdd5~$e|L_Y+GUsdbIn)VmOfIOXI^>R@A79s-~&~=cN_^nYhS0S@;;c-Z_Q+3SI+#QJUEZR p1{mp3s<`lSRJLpO+SGrH^9!xc94J|_9hmDGJYD@<);T3K0RRtv9z*~D literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html new file mode 100644 index 0000000000..cf6c6e95dd --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_integral< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html new file mode 100644 index 0000000000..771358c7e9 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::is_integral< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_integral< volatile T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_integral< volatile T >:
    +
    +
    + + +cutlass::platform::is_integral< T > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__integral_3_01volatile_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..774b26f3f250a4058771421bba0229aa17a05dca GIT binary patch literal 1515 zcmeAS@N?(olHy`uVBq!ia0y~yU{nCIJ2;quWI^h&Rv;x2;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fwjuh#WAFU@$KB((;ivzu%5o1{qg^N z8wb68E@4hqCSNRPl^H6Y=!%{AHoWMZBwycSUN2mdkrl7BVwatshV5EEn{~?< zReW1-|9p+=kHXUbGj^X}_&I)kLEl>8^ZaY(-{;BL8_yP)x+2Kz^ZMO=M&ZKi7OP&( z6*Xa>{=Y|m=}pB}&9qtj?`QMAf4xCEVA8H@_txD{D7}8_PS4i!H`X2q3%I{F(&iZR ztO+VB7t2jk-OP7i=gQdzGv}8=ymxOk-I}oqh6(ZdZ%i}2 z^6)jocmIu9?=y{LERU+ZGze4kv^vJE->$$ zuw=D`y2qq{>MaVMMI4Tko(Mog{%~x-IZJlS=Q&ItLhmpzG0YZVF?d$S?abipByeEH z?)e5R21+c9J6hxz+67o12sAOsI0-a3b1)TfI5O}lavV@(VYFybphR-L@RZ_nj^`|I zPf*pIpwbOW1&SV%WSl0bSb`k8!zrN3%=43mV*lzV-(J+`{PvpU`8#>mKhI12KPD`= ze7Nr9>z~K}L|piirY3hiIMC*JaQ(T(ItTk#e?6)v`^9}4fTD|$g`&*N~C|&2&%HOQ5^!l#4 zr|e#ZMa;#|S9yQpx^#Bl6RULr?=2M3cx@ps`i1~*_tO~1fQlUM{M zjt`|xL0+03nyS)5KD#|@-SRna<#GR2lJVI*^I??8 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_pointer< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html new file mode 100644 index 0000000000..f6bd0999e1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: cutlass::platform::is_pointer< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_pointer< T > Struct Template Reference
    +
    +
    + +

    std::is_pointer +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_pointer< T >:
    +
    +
    + + +cutlass::platform::is_pointer_helper< remove_cv< T >::type > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.png b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer.png new file mode 100644 index 0000000000000000000000000000000000000000..e83115cfa32b6bca98c134279d0af4c0cd96a894 GIT binary patch literal 1686 zcmcJQX;hL~7{_1Ib}=i=U28(eEltw~Q!$rJLXgy4at+gL5l7r`!>vs%%@@>6%?g(+ zm0ZFMw=BlWB6A7Tk|LD|mr#kO8lAV!na-K-GxwZ(@Bewu{qUUo|J~HDhg*b9*QF}u3Dd>SZTP`E0_ z9q$1^hrW-Y$DZZ@utFE(Wba9VO_xQG8QYbX`?(!xUCQU}ykaVUFRQ%w>qDh_y97l8!}2*wyMhX$nYpEZI76jF3foot7#j$dd1Ehf6J z)2_Vjw!p+UaHaieV9Wyz<)F*rmHH$uzXoON75A3fftUCV8#j?-8EOcgaSD5CPv&gdfQbE+4`sJ=w!>5oKx#`TOg$y|VcFF6LaUNOa=bHJ)KS8t@8MVK@Ma6$e#_c~7 zU~)plG8R5>>LWXzAJgkc6I)~FK7%9gf`@+F%YNi?EbrPKCv1%s-NJfu%hM?xW15S5 z5?w7II~6&F`Tl1I=FG9vto1* z9MtyPTAQk6Q4e=^w>=$X8AF+h3i3}aH6#U=P-|z8&F+>+N?NTov?D%^OgS7mV3SnZ z@A#`auZr4`UE;}xrv`GW&$H&G0jB6 zCnOzU;ab{Iu{-#zKJ~^wc<{h|>=6B?I2CPH?xj!*F9sWz+(Mfr^Vs6kFN%cJ-r}9v zq=J@Ek>1uC+Z#m8-N(i>r0qRZkG9|y;KK3AwZ=8tk>1~iH?po3Rd}$%F&`)eg8ML} zC`r-C?uHK-xcfChSCm$6@ZDn*B?!Ks{(d){>`@xcwdY!u0H6JmVe)_p#2Agq{9Pd! ziI79JswwsWXed>z^uJLE@jWqXfH$bzTuJx804xL6T~+Pcr@!LXhHM!0U;v{?_hgOn zZj|LXHEuK;rigOm9a%QqG%W-0Xm3f*q%E z3Kt72xI9Gc@I-eZX|XodFDilH^34JbrG}kj@sp)5<+0Q_UIrGjE9jT!4ys6)yIrp!F2?634oimHLhw4F~tL)L$ z{U?0SBnIx{bZmRV1lm-~iD@_6f)&d@ssc%iZGxUama=VE&-w$yacQcr% hLRHc;= + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_pointer_helper< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html new file mode 100644 index 0000000000..56fdd506bf --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_pointer_helper< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_pointer_helper< T > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (false specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_pointer_helper< T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.png b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper.png new file mode 100644 index 0000000000000000000000000000000000000000..bd1fb4bc590ff0e78b66a73a6e726acb6e5a43ce GIT binary patch literal 1024 zcmeAS@N?(olHy`uVBq!ia0y~yU{nCI12~w0WFJSwaUdlT;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fw|w)#WAFU@$KB#lOAjFu+8pYRr%lD z&a;LwQL(yc&fS%=S9hh$dU0fM|L|VBRc-6FSI$$HxJ;w+mgXgy&pTAYh8~eYP>-r75k5&ox`(|FacTeqN>WiRRU%$tu&)m)W{oEc`1w*Q|a|MsJ^l=qoU|8`uLv%B`tV4)Ikf9|{e3D;kxnib0$^I2culG;7<>|>q; zr>-T9by4%q*!|fM$S-W=lj>no`kH%_@pj>tn~pYp>n*;K_3hqUd!m9BPUO6` zTY6LOI!meKwidOtpMqRjLG4PPvKD$sacFx4Sr)0CSbwlwD`U;chZ4ow8a4@Mvd*7d za5{R;ZplZ+C+rR1XxUe>-Rxev`{=gNH`$r0cb%^NDC_n-TK$dR;7*e6#V=yUyBG4` zSFfn+E=})0S@2YB@{-dPGgV&l=J}|48s_tS3unFAy;$^IL5$ZW-93}Pybrt3?&5JN zc8|Kpq&iMbk4a3(Vg5lNBzvvr+O4}7{%GuD5Mr1w!ea3JwKgxKkEbI;v0U9Hfdf4v zECQ3N7?^?t92~fqI6NE~7&SQ*6j&KqmMAnd03|0hQ6gD4XF-scrM3>5v+jTs|yUcbDuZ8qlvsd|SCzN%H-3%U*O3vZE#ha`q;|L6H8aq&xR zzO~snW82Y1!B4BB&+z_zB6;O*#B0vo+umNitvBJL*zLZ@?}8eOD$eC?-1ceOQ7*1L z!vi+o<3BTm?^L|`->2|(QQedU*EjAGm2o&B#+&e?ob_)X7t@dJXtB`_4a=wY>dw|l U+?xz;0kbQEr>mdKI;Vst0G>>_qW}N^ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html new file mode 100644 index 0000000000..9a6bacc855 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_pointer_helper< T * > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html new file mode 100644 index 0000000000..1e1fb5ed62 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_pointer_helper< T * > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_pointer_helper< T * > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (true specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_pointer_helper< T * >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__pointer__helper_3_01T_01_5_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..6e07cf628b7ee5a214df5b60eb1413e995738631 GIT binary patch literal 1034 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z&z8_#WAFU@$KBVeUCK+T=~UU-v2+* z{N;^jubob;ao>Aei&y=z+f$}K?jPRgEZHre+c->H;xNhTP_IfQGFHpGR6co$sOpoS zQ>Nu_**GcXro>g2Qdu+4yE{!!_2zv$_Pc(U=l%PCkAD6c%5^{T)qUl1&N@5Wy1wiE z7aqswzwcNpEYE!HvAOWBU#C@8@r0gAd--+srJb2+Ay$18f8UAqm~Zy%YNKAyYbw)bC-NS z-kjRExqnxeh&&_^z zBx;`d+pB?hgP$|4OPwO6r|4d@bNP}dw@hcho2S*e?0d-G<$XNYZ}0vdqP{+Ixyn`% z^GRE}Ppf#=ALw1Fl6f}0`R|I^-|94;UXtydRKhK<_V?zlpWb{bFWLT`=~ek#haAoz zwp?D(Im;84&*d8a^TjhT9&p&fxS^)DRffSG$gsK6BE#UkgK>sn8iUP4wgbX>RHptL znrki|*D?ta&)+!k$fiBB?jRkI!5; z`;E%2*1msc+(~j}y__qvRAN3WwuYZMUC^{EcK)LJ{pWAi^i-C}-m0++H(y=5zwEMx zmhABzaZAcCr%RU4{Vk?;`S%N!*hAiD|R*{LkKPKl7_0MLuDIdO4ByY8LwybgW^RrJ3GR1zq+cWd+%B!58 zZqD1Z@Y|l7#+wWM=RQy0xqMxbZn#Um|J>zyrMV7Lw{};r(%F@gc~|G1=7P-VaN&Sk z&q~=gJZe+uvgpw!V8&(eboFyt=akR{0Q1@VX#fBK literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html new file mode 100644 index 0000000000..3ed687e1bc --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_same< A, B > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same.html new file mode 100644 index 0000000000..bc71a8458b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_same< A, B > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_same< A, B > Struct Template Reference
    +
    +
    + +

    std::is_same (false specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_same< A, B >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same.png b/docs/generated-html/structcutlass_1_1platform_1_1is__same.png new file mode 100644 index 0000000000000000000000000000000000000000..66bdead4f88f3948d8b9c47aa5fda9abdfe8fbf3 GIT binary patch literal 1016 zcmeAS@N?(olHy`uVBq!ia0y~yU{nCI12~w0WFJSwaUdlT;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fw|q&#WAFU@$KA)MX$AZ*rxZ--1lF3 zzKWV#LS$j=bCc(BzI#``6=b@`^e-rTt>@aUzXDV>C#v*|sCY7JdQ4&pnxMkEM8T8m zhG2-?bQRYx6Mn~XM)z!-^rUD(xOzs_Lp8~`ZyWc$`e5bQzU-S~)0w}8j!zRyAJ**` zIl6f7-`vRR4}af%yF6?Ag>UBCIUCIiUtjoU&y!JmpFPoN#+1WalXl1~YSP{? zN8KI&i{*YjZ7-QNBlTb5YSWr^hcg{qXRZ6V^>5R&wbLslUe76vYFxZv=gkcjbNJdO zC}kevJ3Tq`usL_B@q)7f%UpKn)jB>talCOx&c+3$4|As75cw4An7!I=-9Ie>6P?`I zMGMUHab#0VSjSZ=9<@XCUQE@M5@mv&GpuQDR=47Z%sAP_fhr-O1i$f>|TCjgH+BB zs{_}U&$wCL#^dg1=xNn6-E)#(L86vtm*vHq>$#-(`{FDvmuY%VFHn2=KJ3D57mrJ! zbA(kq{|m!{*~4*?$b=>p%?D~Lu0>6b$}VO6p|Ouah+)16%ZBsU!=;#RsIoG?DgHdU zjp3VzBZJ~fb_E4iMwTTC4GkhJ0u!1Tn1Tcx9JrV`fRc=w927`Cc+4_2$ScT8(@vy; z<$yA(C3QM4uKjUnlPlW4Tb7r>`cH~O@a?A(OcrPJIjZ8%p4(h$^LNc|R&lkqmd8Btnnuu?>9#DM# zncB9aC4qnEs=v6CWUZO;;Y$1#!QW=Jj>Xg7DwTxXR8cF_&bZTjPGgtXg?3$tDZKWcyk2b0M>vCED?d9S!pOLt2 z{kH{sRkv+tx166+D$4yUyLy7<##`%d-ZM>o``xntp-6S!#%ChUn|6NlGu%AYdU=HS zmAMt^vIi2+aWGk&s@}|85p&};J9kyhzhKsar&IpT7G)_Ad@siG!GAJp6qq|Moib@k cP}gbsFZI?e7aYQ$0kbKCr>mdKI;Vst0I!O+uK)l5 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html new file mode 100644 index 0000000000..0c4aba4854 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_same< A, A > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html new file mode 100644 index 0000000000..973be03044 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_same< A, A > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_same< A, A > Struct Template Reference
    +
    +
    + +

    std::is_same (true specialization) +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_same< A, A >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__same_3_01A_00_01A_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..cb7e9686e871b002ddbc4dae9b57769222680bba GIT binary patch literal 1003 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z+C0&;uuoF_;&8izSjx@tliBf|Noyf z=Qg{&@v+J|o$$2_ZTPaJG&b*Nnf+2lwcfjR ze?@q1l6i5Jg;&(fGrDrcbm6-)uixkI^4$Ku?)k5}c&_a_Z#O?#ckuf5(~C{?7e3A3 z5}tJX$kM~x{^vPl*UGv~Ug&w#cW>^li935X%w4`@+nPMDh`;G|R~|Xf+V}2r+@CjF zq7EOwYR;qK>HKo_?f&amcAYsZvAe_WfonBv`**faP{~{%JMr_4Hh=44sS74k z6fZ8j#%mt@AS3<$*@T9uHkzcBiCzr|-?&uH|>9RPvRT-3^(4_)?*^ zf~VAtSo2Aj&M2GLJ)OHDPtEw-o|_gvyE^Tx^7-Dce8zOcKL1R>vbEptR9>DNvHQ7i zeW}U!D@Ki_Ql-4tCkVyA*VNp(sV{BE@~lN?t5@9(Ry};{*9%+I6R)3XdfHy?RgpcK zI%$cxh2d0{x1VFu?O!e1FaN4Yvd_)as#SgRzc;&nPGI)5YP^2Obn zO$hQ5R%u{JP`JZT0kl;eLOr)NpT3QK*7XUmZ$JIPe&fN}iwwDy?|0Ylo_${Ta-~*& zUf1&3@$3I?V#s^{eQAOBCA;ZQvOj!%`$^-hcgD}#(UIPlE+u#BayCTO-SM574T*BC z?=yBkWXXLQcI(ofPF=&RfonIL{*IPRcqD2*cTrRz+iTY<^Wbu`upF2za?VN|pA|L+UKCkSOq50=ahP%t=tcl-x z_t5XY(#_Q~N`z}m?&Z8)x%Tt1n%>aM%;xtC?wOPaS;~0FZVa7wT4YysmZj(HYOYHm z+up5wcEs#^i0Q^$?gyOr_B>Wjk#lDNBI|-OG89ZJ6T-G@yGywp{Gv1p3 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html new file mode 100644 index 0000000000..1ba94b3611 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_trivially_copyable< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html new file mode 100644 index 0000000000..f779e4e4ad --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::platform::is_trivially_copyable< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_trivially_copyable< T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_trivially_copyable< T >:
    +
    +
    + + +cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    static const bool value
     
    +

    Detailed Description

    +

    template<typename T>
    +struct cutlass::platform::is_trivially_copyable< T >

    + +

    std::is_trivially_copyable

    +

    This implementation only evaluates true if T is fundamental or pointer

    +

    Without help from partial template specializations provided by the user for a specific class or struct, this trait will never report that the specified class or struct is trivially-copyable ; this is always safe, if possibly sub-optimal.

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.png b/docs/generated-html/structcutlass_1_1platform_1_1is__trivially__copyable.png new file mode 100644 index 0000000000000000000000000000000000000000..5103120e458a0effe39afd4ac037e585d4ef4b9a GIT binary patch literal 1472 zcmcIkX*8Qz82(hLWhiY&qm5vMj#3?pj?(I43qnMS7)@wO)Gm@xDMy=P#u6Hml+H+x zSX#77RkV#tqa%xH2Vpq24i1`Hrm9+F%Y5e7{OI4g=e+lQ?!C`*?s?CB?)wwU&r?V1 zfEEA%ItVW}Gyv>?f*7Tt3eJ({VM$_oB0N?cy!UNKhd;w-Bwf`> z-IcK`1Iu_*)-`>8P9iD zy~DisV-MMIX2@|N9oHN~T%PB>O74y`ioDGAKw^y-ks&SRh*VyqDA{p};&NmMXD>vi zPrq$n&1_8eI@T``%+LxZz3D4(z=TfXNqU>T4Mr36OAPLj7L`?VH}>-D2>$e$ma@Q zMG>z3ru_VmDfj(jp~TWL6oW!P;ebQlvFY1Tq8LX6RxqLg{vMR+J>$#RB(BD?%!0;w zj_xsy?FxK)5~tm;^)G|jaK|f@qrr@los7fc*EiYqV}25Cu`Ec_g9J^KFP3rr6k~;j zpFIve%M(4Ooq`$vJ&`-KkoNOXCyOwxqe(WuD_gp2?SoHwiMHpjh6-a0$Ih}>howo) z3$PY?Jd%TZdgHq5+=#)cx_j~Rq0aH9X1}ZJPs%RP^2l~8*rK|0LQh=)GV>r&8KS7M z4e>;13D~Cx<4VpkA23U9R&mk@!A!F6aw(#3Ohau7-+(&a@5&;y1#@*>amHOf}1IQe<(Y*S5T5i%b{ z!ML**-~aYF13!zR8*|?go`8SpQ*8HWoOY=4{$>K7km*nL#?*8uDYKzq`flPOlD)nue7qp2magcE z84Y51TH*{6aI@92#`@=i#{7;-(m8g zL?wyUVNc(Tcq-TW)AK#Id+BdqR~$fWU|rPf$W%row2^?YiRj{KK6j?8s2F^9f#H`N kuv*G1Mf+M--&$PJ&UTBdoTq7%!FCHE-2L1hoDIMJ55d8s`Tzg` literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html b/docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html new file mode 100644 index 0000000000..a04530c018 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__void-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_void< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__void.html b/docs/generated-html/structcutlass_1_1platform_1_1is__void.html new file mode 100644 index 0000000000..e71b03581a --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__void.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: cutlass::platform::is_void< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_void< T > Struct Template Reference
    +
    +
    + +

    std::is_void +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_void< T >:
    +
    +
    + + +cutlass::platform::is_same< void, remove_cv< T >::type > +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__void.png b/docs/generated-html/structcutlass_1_1platform_1_1is__void.png new file mode 100644 index 0000000000000000000000000000000000000000..20e46d78f8b2418c001eddefa551958b302b5950 GIT binary patch literal 1609 zcmcIkc~H|=5dKkML}?JE0~Jq(AdP4_1X6->BZOekNUd^3Dmai;&aZ)V?qyR$R<-nZX-S;x>m zkWHqW004j>eZ4RMpoP}pU-Yy!y8tzRMB~0b<{#*-R;x8|c2Qo|O1Z8E4zTMW4a#RB{X~DrDmb>s;jqIiV8EIe1vw*Q!c}J@O$=|R*E(%1q`X|itEpRoI^_Rvj zuio1fLm>|ZgDr0-X7-cduWU!FO?UMje#w_v z218Fm@UHWzdpa+tB4xbHglwAL@&oa71tL3H>4k3))xr1*Y>y=Tlu^8Gxrg8mx23wo z4NtiVg|JSRv+HC9;v`xc2hpCqn}_wJ6vkWwnfMgh_){q>O_m~~mMg)v;>TQ90+T@l z7aSLw9!2|RRax@$a2HZS+eBeTI}4`$qwgPpcM#3X?DncRZ=nQlK40q%sZH;c#X z_y1LODX%w(4;Baimz9ta39THFl(zHe=B1+TH8 zqup^)GuJ1H@!#CZe$ zbrfQ=aSA;;g}VDHksen?@(+Ypt|*Hi5Vq2zx3@T;*hOy=arxHe2Mm?hQm00G;8w_@;RRTWWbv%GI%hJE4ew_4 z((PsE5f-rP?DWqc_(!2J#VnFRoLB+~iNo4^fLKF>RtS}v82}h*^z^g?_g-gKle(7a z|9=VWgLDPnct4mfj5Bso#}J?vb!|LjPX3}#4+I=NEHCCH$eR~qOBkkcf+p|=uz^)> zC0KG-iu7j@sdv%s#qmzkY6C2rRB`F@>LMmKx3a&Jq8B`?inc0UOrKA{2nH9FRcSu> zf9gVEv?wf86SP4#@kp|NS0PA52f`9te_6R!Z9SJ;lQx$L1(>RpDSH9Mx zm5MKH47ggR-^eD}*XQnMK6~+FT2|LPMTm@V)fIX)v`3;SP_ChRlbUSfV+yB1pKMzB zWYrAV0Sgn~h$Av5l672z5^Afiph>l!{q^pm$(ew{Dd5(!OdGJ7<5Q%$m6$d_U3gbR ugR7gR5Nr0?%{4@fmL25eW#nP(W_P3SWtv|Yr@hwv`vB4#?N#Y + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_volatile< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html new file mode 100644 index 0000000000..a75658c052 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: cutlass::platform::is_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_volatile< T > Struct Template Reference
    +
    +
    + +

    std::is_volatile +

    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_volatile< T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.png b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile.png new file mode 100644 index 0000000000000000000000000000000000000000..7a744237966db444083df255e8cb8d00fe7058f8 GIT binary patch literal 989 zcmeAS@N?(olHy`uVBq!ia0y~yU{nCI12~w0WFJSwaUdlT;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fjP(1#WAFU@$KB#eUBA*-1y}$?fI{q ze>>*Dk;2WHyWVE3J^4~)qG3VO1O2VnW?hL|%^x~R$n%qj<0O#@O)8oSte!y*T$7eG z$T~hP^bB(0-g$rOhSuJxD$|+wS~(u(Ht(#wdF?InXWg34@#d8+|7i}df17paIZwZIW`F*!={si^$m_1$ zALdwG@cX&Vmec&1K8Bb7JPynL5z&6Qq01!ZTj_t+-P_~RjIS9!-oUhW!N!@9Hh%1C zOFX1rv_C!h=CC<;squod0c|e3^J*Q-o|GxXZi~3EtH8`VhvR3oWAFlEQl^#7 zzogTj+_a0w+ez5iwxwy-m4zL9qBJJ49pai4vdu>L$@K>_xumj-ZXCPA#bqvGd^^4R z!b8<-b9p|>J$WxtHZA{d^P7X0<|duixwdo5q&&sq8tbD=9>u<9m+;#-E#te0@#=;A z@$5hTh`jzLH~HZSO*PNodF4%HNa+|m3t`=iY`)J2}Aw|#d zLca+rf7O>Lcyb|!`2pX6wOiG;UVFv#A@mP}3WJ>{N5cI1^G#Sbcylq`c>YYCm+{TS zCI+V~;qadQ><#80pEk)=zyJA_ y@q + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::is_volatile< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 0000000000..c2817b5f3c --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: cutlass::platform::is_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::is_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    #include <platform.h>

    +
    +Inheritance diagram for cutlass::platform::is_volatile< volatile T >:
    +
    +
    + + +cutlass::platform::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from cutlass::platform::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from cutlass::platform::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from cutlass::platform::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.png b/docs/generated-html/structcutlass_1_1platform_1_1is__volatile_3_01volatile_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..b86e2a5973cb2c0a298967eed9160a2c65210819 GIT binary patch literal 1004 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y>312~w0q@1)O2au8o@CkAK|NlRb`Qht}Wrs>9 z09jys;J|^1jTK=)E=Ng_UoZnu5eQs86=KA|z+CO=;uuoF_;&8iq9YmtER&TE{r`W` znq4UT<)>4xBi-AlCi7l2nQ-}lw&in~Cv!ftL{4Jzl;SX-^n?=$-z=CM*RS$6(0N{d z=5}{^ZO>`Ki)SrZcu*}<+P}AG@$BNqf9>xsx&Hp&v!DO;TEnM(+x%qR!SmaXyT6Qc zI(xZ85=n^mWp^W;s@Rkur(i)SY8_VawZd6#D5*ZDt#uPCgKUas;r#C%eT z*mTcH`wsT5RLPv5-TZge?0?3T}Mm_At6FtRWRKV&=b>8ofFgA9lfAY8=2^N=l}+kp82r#ZvC0xDBK4a_x{ ziY%FOiG@>vL4pIthMkX|305lYXtTL5*KT`&?f1)FKF`_gUu?c(&v5+DhS~L@Q}#a1 z-%(hrcjm=`v(cYsKRu-X==OQlt5SbzqU#qgb^dvK<(hT%(=%V1|9>U^`_8jvJz@W6 z>VN&Hc=k#Bn&NkdwjWo%XzT5$vsdqnB-d+?%Nu9s{kUZ?J;&JdeVQH1u~_r9n@?YP zcKhSbtLf`zGh5#;*!*rvra!OZ@;hIIb#`A%kNWmk8SD3^R?v`2O{&lSw3*qw9UR=SQ+t{;lFAiFm$BvGW^jz s#M_{LhoJ&W$>jxkX?kcbomkJn{nV=9a@Rq3VAf>tboFyt=akR{0A>u`YXATM literal 0 HcmV?d00001 diff --git a/docs/generated-html/structcutlass_1_1platform_1_1less-members.html b/docs/generated-html/structcutlass_1_1platform_1_1less-members.html new file mode 100644 index 0000000000..24798c6b08 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1less-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::less< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::less< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constcutlass::platform::less< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1less.html b/docs/generated-html/structcutlass_1_1platform_1_1less.html new file mode 100644 index 0000000000..abaff3e489 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1less.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::platform::less< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::less< T > Struct Template Reference
    +
    +
    + +

    std::less +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool cutlass::platform::less< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html b/docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html new file mode 100644 index 0000000000..c35b9e853f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1nullptr__t.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: cutlass::platform::nullptr_t Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::nullptr_t Struct Reference
    +
    +
    + +

    std::nullptr_t +

    + +

    #include <platform.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1plus-members.html b/docs/generated-html/structcutlass_1_1platform_1_1plus-members.html new file mode 100644 index 0000000000..6055a46c00 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1plus-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::plus< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::plus< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constcutlass::platform::plus< T >inline
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1plus.html b/docs/generated-html/structcutlass_1_1platform_1_1plus.html new file mode 100644 index 0000000000..71f732c90f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1plus.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: cutlass::platform::plus< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::plus< T > Struct Template Reference
    +
    +
    + +

    platform::plus +

    + +

    #include <platform.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexproperator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr T cutlass::platform::plus< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html new file mode 100644 index 0000000000..a67005a93f --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_const< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_const< T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_const< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const.html new file mode 100644 index 0000000000..d0af5788f1 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_const< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_const< T > Struct Template Reference
    +
    +
    + +

    std::remove_const (non-const specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T cutlass::platform::remove_const< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html new file mode 100644 index 0000000000..49041398ab --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_const< const T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_const< const T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_const< const T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html new file mode 100644 index 0000000000..a8fff9b4df --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__const_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_const< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_const< const T > Struct Template Reference
    +
    +
    + +

    std::remove_const (const specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T cutlass::platform::remove_const< const T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html new file mode 100644 index 0000000000..64c6607b3e --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_cv< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_cv< T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_cv< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html new file mode 100644 index 0000000000..5972cb34b5 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__cv.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_cv< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_cv< T > Struct Template Reference
    +
    +
    + +

    std::remove_cv +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef remove_volatile< typename remove_const< T >::type >::type type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef remove_volatile<typename remove_const<T>::type>::type cutlass::platform::remove_cv< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html new file mode 100644 index 0000000000..19a47545ac --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_volatile< T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_volatile< T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_volatile< T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html new file mode 100644 index 0000000000..eb259c2e6d --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_volatile< T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (non-volatile specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T cutlass::platform::remove_volatile< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html new file mode 100644 index 0000000000..09e68535b7 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::platform::remove_volatile< volatile T > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::platform::remove_volatile< volatile T >, including all inherited members.

    + + +
    type typedefcutlass::platform::remove_volatile< volatile T >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 0000000000..d2a95b212b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1platform_1_1remove__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: cutlass::platform::remove_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::platform::remove_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (volatile specialization) +

    + +

    #include <platform.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T cutlass::platform::remove_volatile< volatile T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structcutlass_1_1sqrt__est-members.html b/docs/generated-html/structcutlass_1_1sqrt__est-members.html new file mode 100644 index 0000000000..56f161409b --- /dev/null +++ b/docs/generated-html/structcutlass_1_1sqrt__est-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::sqrt_est< N > Member List
    +
    +
    + +

    This is the complete list of members for cutlass::sqrt_est< N >, including all inherited members.

    + + +
    value enum valuecutlass::sqrt_est< N >
    + + + + diff --git a/docs/generated-html/structcutlass_1_1sqrt__est.html b/docs/generated-html/structcutlass_1_1sqrt__est.html new file mode 100644 index 0000000000..c973ff7234 --- /dev/null +++ b/docs/generated-html/structcutlass_1_1sqrt__est.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: cutlass::sqrt_est< N > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::sqrt_est< N > Struct Template Reference
    +
    +
    + +

    #include <cutlass_math.h>

    + + + + +

    +Public Types

    enum  { value = 1 << (log2_up<N>::value / 2) + }
     
    +

    Detailed Description

    +

    template<int N>
    +struct cutlass::sqrt_est< N >

    + +

    Statically estimate sqrt(N) to the nearest power-of-two

    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int N>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1aligned__chunk.html b/docs/generated-html/structnv__std_1_1aligned__chunk.html new file mode 100644 index 0000000000..0734188791 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1aligned__chunk.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: nv_std::aligned_chunk< Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::aligned_chunk< Align > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1aligned__storage-members.html b/docs/generated-html/structnv__std_1_1aligned__storage-members.html new file mode 100644 index 0000000000..21b81924b7 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1aligned__storage-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::aligned_storage< Len, Align > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::aligned_storage< Len, Align >, including all inherited members.

    + + +
    type typedefnv_std::aligned_storage< Len, Align >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1aligned__storage.html b/docs/generated-html/structnv__std_1_1aligned__storage.html new file mode 100644 index 0000000000..2d99523b1a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1aligned__storage.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::aligned_storage< Len, Align > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::aligned_storage< Len, Align > Struct Template Reference
    +
    +
    + +

    std::aligned_storage +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef aligned_chunk< Align > type[Len/sizeof(aligned_chunk< Align >)]
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<size_t Len, size_t Align>
    + + + + +
    typedef aligned_chunk<Align> nv_std::aligned_storage< Len, Align >::type[Len/sizeof(aligned_chunk< Align >)]
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of-members.html b/docs/generated-html/structnv__std_1_1alignment__of-members.html new file mode 100644 index 0000000000..ea6de866af --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of.html b/docs/generated-html/structnv__std_1_1alignment__of.html new file mode 100644 index 0000000000..de1689cbd1 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: nv_std::alignment_of< value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< value_t > Struct Template Reference
    +
    +
    + +

    std::alignment_of +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< value_t >:
    +
    +
    + + +nv_std::alignment_of< const value_t > +nv_std::alignment_of< const volatile value_t > +nv_std::alignment_of< volatile value_t > + +
    + + + + +

    +Classes

    struct  pad
     
    + + + +

    +Public Types

    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of.png b/docs/generated-html/structnv__std_1_1alignment__of.png new file mode 100644 index 0000000000000000000000000000000000000000..8ee1b829fe54939f227317cc995953a1831f2672 GIT binary patch literal 1636 zcmc&#X;4#V6b+aNNm*1785QW10FgyjLAF2;COno9kwrw-AX}s$D59ic5)BTH1${;$ zSRm8_#)yPPD6#})Ace?g!Jx9ZuqhxB3_=2=FZNew{NFe8y?5_<=f3ZJ_nw)TM| zK&k54K*BK?L{eNMy+JlG7-9wR#06z3EsoEpzw*)cd|ME*drAJ6K%nbFe^Mwms=Ut$ z19oKG2+1VN8Ooyl8dr`qQ`qh_^@jN5Z73HHa-}`H!|*%F)2}1XUK2$7+9{J4yBAwK z@xDu&s|aHN^24cQ^($DNyDXm(aJLkn;A~ps7ZcTduGswBL2^i}2KXvu(-j z$xK~Q{+@#l*{@EIt*6Jh=&b$?$I+@6{0BLwMMx(xw%bx8*Lp*`w3D+VWse zqY91|_3jD0I_eAeVY(%5K0d?b_)T~H%IaX?`!uL2uP0l1p;;V4_qDW@3GMyUD3J{B`wqK#zAVi zM-8mX{}1SC+0J@%;j+r%gTlHECDZt@!c!hy=E6EZ9vZkHyT$;~6|vwH)}IK*G!Vf! zk1ll2Wn)Z_)d>t>QyvQeTFf|I3nsdVXbk~KADe4BKA_*d+vp;37F~nO?-q(!V4T9~ zFDlOW|Jg)YX_TR+Q_T*KcVBtED3&~eZ$9RfJ-?17Q-8t%b&&m|!emXmnCS2|08y2* zDS;dH=wud452yomGdJOS1i;Pq4|$rD)H<>zzyJl`y!ho>?Db$W+X<&1@-iq?#a{O_#0L6iP1@WAg$?f`t!PI}L@g(|-Gba(UQl>;k z$|mWrww7Rr&NR2Z>3(2xI>USJbmj|&g2j@1ASmHoP^bL8XKQ)60$zX z*Esmd6Ema3HJTUrt&tL{bnP~)bCQ6y954Q@1S#dBLYl0{dOikkUa!GxeaWt*dMP@C z#sSNiJ6wBU3$+hr1lTV#QQ%bJ((apRvr^G;DZFiJMIHcBx;Kn&pv^KDZlhaX^5rIs zWeN(6vo#odHUW37=R}xn25Cxdy>6hL*SbR&>rK7EaB7{3>myE?4-s@RbPK}(%MC$( zpkE>#RJ#iqRm;>EFt)=1xkDZRh=g|2DrTcQ6U!mTUWI`FSeOO35Y{nQ5-`kDVTFO# d!a8#Uu|Zz&18*99AP@Q_zyL3T=R=QZ`aft$0Eqwq literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html new file mode 100644 index 0000000000..b64284eff9 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< value_t >::pad Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< value_t >::pad, including all inherited members.

    + + + +
    bytenv_std::alignment_of< value_t >::pad
    valnv_std::alignment_of< value_t >::pad
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html new file mode 100644 index 0000000000..3670e505cb --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_1_1pad.html @@ -0,0 +1,136 @@ + + + + + + + +Cutlass: nv_std::alignment_of< value_t >::pad Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< value_t >::pad Struct Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + + + +

    +Public Attributes

    value_t val
     
    char byte
     
    +

    Member Data Documentation

    + +

    ◆ byte

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    char nv_std::alignment_of< value_t >::pad::byte
    +
    + +
    +
    + +

    ◆ val

    + +
    +
    +
    +template<typename value_t >
    + + + + +
    value_t nv_std::alignment_of< value_t >::pad::val
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html new file mode 100644 index 0000000000..89d2ce1066 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< const value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< const value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html new file mode 100644 index 0000000000..50c19a3a8b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: nv_std::alignment_of< const value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< const value_t > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< const value_t >:
    +
    +
    + + +nv_std::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.png b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01value__t_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..3412a3c455e3545926f1064025743fc8f5785265 GIT binary patch literal 861 zcmeAS@N?(olHy`uVBq!ia0vp^kAOIUgBeH~pNX6eq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~z43H$45?szJNIqh90ML#f4xil{wtT4 zDI{1w%$8Wy=Jse?&&IiPZhZWgD$(}vX3~Tu>Z_H#f?mJsm~>^Aos-Wy`|Lh7&s8hG zctu+jDOyqK=S-vs(mwYDQd)=cSseI?Rp0z7xlg{S-{Oov& z+2@=Mr@bysGZQ$zN6ygmbaK7MNt&jBZSt}r@o>fSN^BF7X3q}Q z`Bk*H>++_nZfkpIdxw5;7tM>2y*T}K#bU9wQk(9Sh(De>JMEc~%cmL9ZC%p(=Z^c% zwdhs7nZE0rWwG4Dyso%8D<*we>G!8~%983*fseJP`R7JjW~;8e(%CcVNvYP4Hm$ln zg`T?{zniR5`D+Xg5UWKhTi1y{GT=G9nDs;XSLP2nw(=jJKmXWs-n~Z9Zth2Whd08a z3u;SU8m?UxWQ+=TXUT|_6uF?6aVm3BLholHq(Pr zCFKK|xli`T9kn*+D~OEG3~8tjhlTA|Cm+qF?yvdNQuO;&V@xc7d4a*x)z4*}Q$iB} DZbg|7 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html new file mode 100644 index 0000000000..6b9668d7f9 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< const volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< const volatile value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html new file mode 100644 index 0000000000..167c865f28 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: nv_std::alignment_of< const volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< const volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< const volatile value_t >:
    +
    +
    + + +nv_std::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png b/docs/generated-html/structnv__std_1_1alignment__of_3_01const_01volatile_01value__t_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..39ebbbbc847dc880108c1dd754f21b9d7c3b35ab GIT binary patch literal 988 zcmeAS@N?(olHy`uVBq!ia0y~yVB`m~12~w0q(L0REFdKj;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fjQgL#WAFU@$KB#eV?^>T*a*~?fI`f zf1Bn>v*{*#=dAkmB%@kQmF>)-8T)lkZ(5Se->tIr?))c;C#Tq`%$(G&68vdG$}g3E zp?{Udw^clqU;jN7c>U(~&6hoclJ7rM^h{qNe3)O%l9qFJ%l+KYo?mM*t-oTA!;?_C>|@6(fA6@q#Qw>~pJytL zsNP-vbz<%e$G;VmPPA8;tbTW`WQP3Rk29Vhuf6rKQu6BM_fOsLv@G-LsXXFhQ6INU z_;SxD?yrk=g}+Oute>>?#rsg>E9V~wpUSE>_bj^W9V)vsF5{u!&u4#?JlEfPo#=Hb z{Ee;LcB8U9^JBYpJrB;42%NNN@715Te|}6`FzE~H?^l%*m&k8N4XRxqBBrH!uI4|W ze?#6O>J!6@x9{~ERrnhg#ulhdc*1C~aS7v$N$d$;at)f549lKSfm7G!uDKK#Z>$Py z)7(EXl&SI`m}0{a{E5LQz2c5-)bZ2rH|tdI_f}-LNK8Jyx0LsB!H%299e-EbFr15c z{>$(FyyJ@*;~y4Q?%Wr;Id@Oosq5n03wPc7cXwaT-PduU^K)6=zvi_|d6ByL`h)8~ z58eKHUhJe<)^i!@Lvs$Sn|pj-d2>umXGEUZjnw#jZ^QX!vvqCkO$}ceTs@L1(AHY_ z;Lg%8i#^liFZ0C~3!iW-DE!IWFL10v(%t)w*_!gcuc@b$b9`p^CEm^5mD~Ak!trOu zj|C@{?$Nw`aznsj&)&{0|M*@PO+U9h{-v{=^0&LU(tlo*?N|T2_~q?SGkkuYsEnDi z{HNeT)6k?d%4&yxel}V!uUo+VKjyE5W%P{auid+ME_)lMnmuvbD!Z7bSmwe*6&z~c zyXQ~e!2kBI(5vn82k-uzr}Z_yxSDC*w+aTETzLtm2jV$r9*32&ePNK2erM*eMcs|% zNBvuc8Q(uYP7mY!u&wsV9)>^4(;z{7lGy+l-X#wMy)-kw{^YORYbkLgvmKOO89ZJ6 KT-G@yGywqTQ{OoN literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html new file mode 100644 index 0000000000..8f7bf3dbfd --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< double2 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< double2 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< double2 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html new file mode 100644 index 0000000000..2fb8e549e3 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< double2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< double2 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html new file mode 100644 index 0000000000..e92dc0f6b5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< double4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< double4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< double4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html new file mode 100644 index 0000000000..ef1634f4dd --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01double4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< double4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< double4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html new file mode 100644 index 0000000000..faaf84c514 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< float4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< float4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< float4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html new file mode 100644 index 0000000000..8f4dda71e0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01float4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< float4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< float4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html new file mode 100644 index 0000000000..84fb87dde6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< int4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< int4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< int4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html new file mode 100644 index 0000000000..f4319fff6d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01int4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< int4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< int4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html new file mode 100644 index 0000000000..65343cb743 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< long4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< long4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< long4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html new file mode 100644 index 0000000000..b46cf67685 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01long4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< long4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< long4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html new file mode 100644 index 0000000000..dadea6bca5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< longlong2 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< longlong2 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< longlong2 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html new file mode 100644 index 0000000000..0ab4ebe8f6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< longlong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< longlong2 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html new file mode 100644 index 0000000000..40159270a4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< longlong4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< longlong4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< longlong4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html new file mode 100644 index 0000000000..02995dc676 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01longlong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< longlong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< longlong4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html new file mode 100644 index 0000000000..7f1d7cb09c --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< uint4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< uint4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< uint4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html new file mode 100644 index 0000000000..f6e6b5da2e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01uint4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< uint4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< uint4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html new file mode 100644 index 0000000000..1108074f04 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< ulong4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< ulong4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< ulong4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html new file mode 100644 index 0000000000..c1344383de --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< ulong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< ulong4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html new file mode 100644 index 0000000000..69d800e712 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< ulonglong2 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< ulonglong2 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< ulonglong2 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html new file mode 100644 index 0000000000..fd5825468b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong2_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< ulonglong2 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< ulonglong2 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html new file mode 100644 index 0000000000..71454d9c5d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< ulonglong4 > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< ulonglong4 >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< ulonglong4 >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html new file mode 100644 index 0000000000..b6572940fc --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01ulonglong4_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::alignment_of< ulonglong4 > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< ulonglong4 > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    enum  { value = 16 + }
     
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    value 
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html new file mode 100644 index 0000000000..c0747d0b56 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::alignment_of< volatile value_t > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::alignment_of< volatile value_t >, including all inherited members.

    + + +
    value enum valuenv_std::alignment_of< value_t >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html new file mode 100644 index 0000000000..9d36cbdb7a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: nv_std::alignment_of< volatile value_t > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::alignment_of< volatile value_t > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::alignment_of< volatile value_t >:
    +
    +
    + + +nv_std::alignment_of< value_t > + +
    + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::alignment_of< value_t >
    enum  { value = sizeof(pad) - sizeof(value_t) + }
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.png b/docs/generated-html/structnv__std_1_1alignment__of_3_01volatile_01value__t_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..3c0bfdebad231c906a1484e1909ba7f833512616 GIT binary patch literal 882 zcmeAS@N?(olHy`uVBq!ia0vp^Z-6*}gBeKbvSw-kDTx4|5ZC|z{{xvHzP?y?sB{95 z1;z&s9EjLh5eDRPlmz(&GXNEVz|~VBMhpzhjGiuzAr*{o=f0h^M1jYZ|N5%R|L*tl zM12{i{wpXGU-8M$TEK~q^P~P{6W_fZo|ArEXqdW0%rrrDrR!dP={xuscXVczeYnx-aE?S>8kInihQnuN$>FdXJx1-J<^L==3-s63&vlUVq=ck#i zzV-Oby4zx>cU)PpZ%Sl+&B?PmhkuD&n_d`pqI3G~_c>Lj*Xwp~pL;{;yV(<|Nt+96 zwcb9N^k(z6_7|4h>aREMzt*ezGIsBWqf?i}U(nuA9KXW)c+otSlP_8OCq4Q4>(Oqb z-)2oJcb7_^j+pdMmp8&QE08&CQq&dZvRTVk3N!v;{mbxW(Y%EEI;%x*Ilmpe#h)9# zVX-vxmX(a$7Z$PUICM*GV9?Dvz_eurA%Q!q1)N^EvNYJ4Brrx@X<*F=G~KY;HS5QZ z{ofW8?RHOR(a4@3{mtU;vy4p!|0H!BdW|lB-pcpw)Gyf^$4`IOJ*D#CXKmHK83K!5 z{xLqEb2?dR(b;DQ*KK8vPU$xOzO7E``^-%no^E4lqR1KM8|tlJ%YO-6lU>>F zt5+DM5#1ScD9_A&!@e`qa__z^s9y6g=E60V%_j}@61~}HH_E1hqb6+w`@eNPr(@q4 zZ2spmX?A4WSIss0kJn7lexGyl#*u*f^vlLEzu(>~OS*2HyGdlu+tqc4b7c(9M`f&& z-ktaSwSv@o&p!K;?JOJo&z6fl2#GIXEZBW&!{<f*_*Xyg@+r31ePLG-HQcT~M{yA_gD`ly`njxgN@xNARK2NN literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1bool__constant-members.html b/docs/generated-html/structnv__std_1_1bool__constant-members.html new file mode 100644 index 0000000000..bc6447d4e1 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1bool__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::bool_constant< V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1bool__constant.html b/docs/generated-html/structnv__std_1_1bool__constant.html new file mode 100644 index 0000000000..f25c9d8bc5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1bool__constant.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::bool_constant< V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::bool_constant< V > Struct Template Reference
    +
    +
    + +

    std::bool_constant +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::bool_constant< V >:
    +
    +
    + + +nv_std::integral_constant< bool, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool, V >
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool, V >
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1bool__constant.png b/docs/generated-html/structnv__std_1_1bool__constant.png new file mode 100644 index 0000000000000000000000000000000000000000..1596dfecef39974e34ca795d318a2ca710e343b5 GIT binary patch literal 784 zcmeAS@N?(olHy`uVBq!ia0vp^H-R{SgBeIJzON7hq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~&G&S145?szJNM$GB?>&Q{MT3g{O^A+ zPq1}D*qe8Er6QK?S?uKTe1VJ}(l3%pwtp0b$S!;iqk?_ZJ zWsYe^pOw-lIjz+;vVXn3_}SA#s+)V4--ud%HKJs}=GrvbYS&x)&i{$pqjY+gQ%=>r zb>@2uUnd-$zQ*cyVDIeq+S2U&Gkd**mQMTZGb!pyNrUGl^OB#*b~kT{ow+`F$@L=t zslW47R(f9d+dpmdeisGL_b+E()$rW!KG|o|uS+VEm+bqZ5Dg5qK!*Q5_6*Kh5u4`E z54_l>THV0gWu|xYO)taV%t?%2mU^;Xn64)25I-x0p_ZV)eC-y2HW9`I2VbLxeI_Rv zeqC};xqazw^XpY7>r+$M9=%>0@IQT1`_aoPnf9#LCl}P7I1zT)Vq)IWMM*Y$-_GlM zcy^P^?&-~Jo1XJWUT5q(U1_vkX;%q%g5G20eHJNV(hGvT&(#>E?{HgnB4%}s*8H=V zW2f^R=$v)3Csk`p|H(~J{iZY1d_S07@ZMH_(ucR!A#L@=^E1CEGRCc*yXH&Ix0%~j z*MFN?`dD=3%H#Vto%hM^GTrk1SjpX2y_KeUe+}A#Yt}_CKfe8(aYOv-*(v8^ + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::conditional< B, T, F > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::conditional< B, T, F >, including all inherited members.

    + + +
    type typedefnv_std::conditional< B, T, F >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1conditional.html b/docs/generated-html/structnv__std_1_1conditional.html new file mode 100644 index 0000000000..9d75181772 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::conditional< B, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::conditional< B, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (true specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool B, class T, class F >
    + + + + +
    typedef T nv_std::conditional< B, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html new file mode 100644 index 0000000000..3ed9624a25 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::conditional< false, T, F > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::conditional< false, T, F >, including all inherited members.

    + + +
    type typedefnv_std::conditional< false, T, F >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html new file mode 100644 index 0000000000..7fbb13266f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1conditional_3_01false_00_01T_00_01F_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::conditional< false, T, F > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::conditional< false, T, F > Struct Template Reference
    +
    +
    + +

    std::conditional (false specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef F type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<class T , class F >
    + + + + +
    typedef F nv_std::conditional< false, T, F >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete-members.html b/docs/generated-html/structnv__std_1_1default__delete-members.html new file mode 100644 index 0000000000..7e8923463d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::default_delete< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::default_delete< T >, including all inherited members.

    + + +
    operator()(T *ptr) constnv_std::default_delete< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete.html b/docs/generated-html/structnv__std_1_1default__delete.html new file mode 100644 index 0000000000..084e2b7ffc --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: nv_std::default_delete< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::default_delete< T > Struct Template Reference
    +
    +
    + +

    Default deleter. +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void nv_std::default_delete< T >::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html new file mode 100644 index 0000000000..7c038dde1f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::default_delete< T[]> Member List
    +
    +
    + +

    This is the complete list of members for nv_std::default_delete< T[]>, including all inherited members.

    + + +
    operator()(T *ptr) constnv_std::default_delete< T[]>inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html new file mode 100644 index 0000000000..c51c3518b9 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1default__delete_3_01T[]_4.html @@ -0,0 +1,133 @@ + + + + + + + +Cutlass: nv_std::default_delete< T[]> Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::default_delete< T[]> Struct Template Reference
    +
    +
    + +

    Partial specialization for deleting array types. +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    void operator() (T *ptr) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + +
    void nv_std::default_delete< T[]>::operator() (T * ptr) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1enable__if-members.html b/docs/generated-html/structnv__std_1_1enable__if-members.html new file mode 100644 index 0000000000..e5464c884e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1enable__if-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::enable_if< C, T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::enable_if< C, T >, including all inherited members.

    + + +
    type typedefnv_std::enable_if< C, T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1enable__if.html b/docs/generated-html/structnv__std_1_1enable__if.html new file mode 100644 index 0000000000..ac720dd04b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1enable__if.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::enable_if< C, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::enable_if< C, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (true specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<bool C, typename T = void>
    + + + + +
    typedef T nv_std::enable_if< C, T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html b/docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html new file mode 100644 index 0000000000..75fdd5100a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1enable__if_3_01false_00_01T_01_4.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: nv_std::enable_if< false, T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::enable_if< false, T > Struct Template Reference
    +
    +
    + +

    std::enable_if (false specialization) +

    + +

    #include <nv_std.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1greater-members.html b/docs/generated-html/structnv__std_1_1greater-members.html new file mode 100644 index 0000000000..b6fdba6d7d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1greater-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::greater< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::greater< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constnv_std::greater< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1greater.html b/docs/generated-html/structnv__std_1_1greater.html new file mode 100644 index 0000000000..c684839d4b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1greater.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: nv_std::greater< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::greater< T > Struct Template Reference
    +
    +
    + +

    std::greater +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::greater< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1integral__constant-members.html b/docs/generated-html/structnv__std_1_1integral__constant-members.html new file mode 100644 index 0000000000..2b5bc40662 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1integral__constant-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::integral_constant< value_t, V > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1integral__constant.html b/docs/generated-html/structnv__std_1_1integral__constant.html new file mode 100644 index 0000000000..777f3f1977 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1integral__constant.html @@ -0,0 +1,261 @@ + + + + + + + +Cutlass: nv_std::integral_constant< value_t, V > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::integral_constant< value_t, V > Struct Template Reference
    +
    +
    + +

    std::integral_constant +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::integral_constant< value_t, V >:
    +
    +
    + + +nv_std::is_pointer_helper< remove_cv< T >::type > +nv_std::is_same< void, remove_cv< T >::type > +nv_std::is_integral< T > +nv_std::is_integral< char > +nv_std::is_integral< int > +nv_std::is_integral< long > +nv_std::is_integral< long long > +nv_std::is_integral< short > +nv_std::is_integral< signed char > +nv_std::is_integral< unsigned char > +nv_std::is_integral< unsigned int > +nv_std::is_integral< unsigned long > +nv_std::is_integral< unsigned long long > +nv_std::is_integral< unsigned short > +nv_std::is_pointer_helper< T > +nv_std::is_pointer_helper< T * > +nv_std::is_same< A, B > +nv_std::is_same< A, A > +nv_std::is_volatile< T > +nv_std::is_volatile< volatile T > + +
    + + + + + + +

    +Public Types

    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    + + + +

    +Static Public Attributes

    static const value_t value = V
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef integral_constant<value_t, V> nv_std::integral_constant< value_t, V >::type
    +
    + +
    +
    + +

    ◆ value_type

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + +
    typedef value_t nv_std::integral_constant< value_t, V >::value_type
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator value_type()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE nv_std::integral_constant< value_t, V >::operator value_type () const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + + + + +
    CUTLASS_HOST_DEVICE const value_type nv_std::integral_constant< value_t, V >::operator() () const
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename value_t, value_t V>
    + + + + + +
    + + + + +
    const value_t nv_std::integral_constant< value_t, V >::value = V
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1integral__constant.png b/docs/generated-html/structnv__std_1_1integral__constant.png new file mode 100644 index 0000000000000000000000000000000000000000..ddd792d735c7726ca71925ad45997016a0ec89c9 GIT binary patch literal 12811 zcmd^mdsI_r`n8U&&Uh))YU>3hze<2w6ciPzTvA(w1_j#Mf)qonL{#LWAb|i$Y^k>@ z(u6|P0MS~Z4niboA_NFnEr|%I5kovAk#ZAqg9LIV$@iWhS{-d?eQRd^=$y4!9^fS5 zyl20AKYKrqM?Ve?nl*jF^huK@%|d^){?kd5{(_q{>6P@qP60n*%vk#w_-Ee7n?BoM zv)RDu(CDL*YpLIXGy7LVL&KKJ^lZ9nOw1alW2x`MGh0C93g&wd>(YYx%Z@Db~HTsI1O`jAqXy(xbej zsk0=>MC(|_BZh>(t>GI!De{$yAm?nd! ze={B0`pecswQXBuU#2!on6^Gk0W_NqsY@E#R(Z|a`yNX>r8D5{Y*F|1(Jj2lqDPmQRqzP#tH01^1$;e3VgFb8F6{}6j)pRgfhIenwN%!iox ztqqMs-MmO?Jb(zqOV#YKM{<5N4^P2_5nmU_Wk$=oh$}G(-SjBZ#Z0>84N;6a%&;yt zXXex~50=CjuRzsA&)5cRtW zT8Aw;1Y70C-BqT>w$)zqhUVJ#+LS9Z8cKb<<}DvNY_8f=OKMyPDfbw>ho+P{q+c%u z>odf$?%f>oHSY|#ug4GOecRr>n{46B&ye4HuY~MA?>1`TLLu+XnU5Yn*TT@!%|)T5 z!pR>G*&1ienRDwl|ID@m8ZCV4t9h%u-Z|MD7E?ZRdfzVG$ICyNo&ELh1@q@@5`s%k z2Tpxe2wzY_S=YEKXXf;h-QbcC=YQ)Z+7ZgKX4XB{(&ouS-xQ-*$O}kgI9TAErRWp8 zam&w8Tm&n?Gg-w#t|(ivD{aYbKZdN{SC>KV)G0*zU~1EvA|4gj|0wAW zKt-GLMyhl1GFBU^g3`F2#t$W1gLq@r5oC=hCCR3>RYnT#&$zZ@knm96u07gZt35xe z;GR>@vYgzz$l^`%H*27a{qanfYA42ivQZ?ldZF(Fa8FiKiV!m0okFU7UogI@=6dc0 z^I5YiZ(f~KM$x`%()OE3`Y3+XQ$ErZYk@%{cL>4sxGHXt#J;Ajdfo!O}BIs+hT+EH9^}oRmOV4XDN>aI8RR#^y-+Jxxg1X}RsPz>ap^DCC z&%MTa2-59Qxz={CM-mEJ71>3tVVe%F=pVvE<2)D)N+?@3-c1^xT7k>>qq7%B0K27LSOuw=rxn zQWT?4yi%B$Si|iux>fAWqJd*4emRS~I_qMR`I9v}Qab&rVknY|Z{<($O_z0-U9X`w zei!Bp1<+e=?uR5ruO&UWO1Q?Q1aX>`J#8epZ&_!NPCEAn{jM5$pTCV?$@hSIs}CwS z^SGF3c6n}sC)!(DWj;NZS>vivYX-LK)yD zn-Fl^zKxGLZ_9%D$9Lh-$qkHOz!HHoO`T}stegX%Z7JAsVMk=al!t%wC*nRHQpf%K zN%`g2{T=oXLx@>RHv%$WI4SIr5$w=lY7zkFb1;UtZp`?dC2s=vO!j37`M5f-B9Wzl$p!T?I_Q6U$v#LdKO|&BF)>8-R6&7Qi}+xsFK_ImWeu@th}5JOyPmvEmE}A3!N|BzUtm4fUEAa#jua(}nmZjg!rQ+2kINfDRU2ScxUlnhjIFb0E zJc&_`Lx*x|sHraIEqK+NI`(=AUf+v1)=#f+hVEU?SiJ-=(1+G&9PQ)Q8yAn6g0%D>57D{D(c=`)Otl`zG2302ZXw)R2mNT2fc3OTER4n`?8$?dFCj#N~|w#pUHFSwNJ4T9`yq>QBlG1U2DYgl}MpMBiSibEG9JwH0#6%jeMV#}F!cxaX@HKt;3Ugpyj~dif z4+WfNBw@iHtIpFcJpc+na>T#4`ui84^5RPfCjL`(Z$m^+ss%9dqow&!Q?Q{{S;d+t*HjmzdWMx-Td(z;w%EFZvMk90+9)|5Mh91hzT17LT zvyt|rk$>HKeb#`o<|Z2fsDsq#Oa;6DH)qP`b_Mf zMt{_AHXlf2)Gf~0Z0aZ91W7 zyQ_t@$`E(2>D>aPtsMNp#bg(A{T-yy5iEznjSb-Pm;W`mL_G%=?1>QSVVIM7-!4U~ z_TYwQ+rTBs8AOFz+GZsX*OH+MTv9eu-B#uKM3ib~i_HgHzDE+Sf<<+56i2!57)^=I0rCtjvDw{!X>o^fi4T7tOMt8A=uPT(zVYl@51(<)-O_rVVa;bu0v>q>& zv2#tKI1cchE7m5Rmh+`y3D=CStZ6%+s?^_~RjI~STVF66v*{Uv={%;$7#=4W6bzOo zo`2GS1X(2~^VyHbU;0*znBHO0iSml-Os$FcZx>gGp?DY#qE58K} zZOZ2HTD3UCjv4X1lT_qkcwZ@?9aKM1yuw|Br3nw#b6>U_ZZhz-?p%m`z^U zf6#Hhm{4A9ZT*NiP^Mp5+MGPe?`(hWQZy*#+FyDX6J~YOF~wzLWyU@&iR>a6?ZAu) zI*^2)z#>?`7?&xmf$St+QYvyYt1JqJs8q{Q2J5^HCaa&>OQXEI;~J#bUht*VVLLV% zb=#?9p8|#99e-hI({--3{b0+^g;x#c2k=H!2IEmIU*Iwmj$N&8t3OR_NGr+89G-j% z7P0U6t!7BKTY6N1=hjyE^oOJ@5}PlCIP#7vM5p`{aGv03ix0$cM|X4TiEOIN8p&Yr zRIC{hDPYTQm^u<@*!AqwmyX4!(lZw=sx?5@!AG9@yV&J>?d<5edNH0L_4HgRf_ht~ z6RMkCuggz*db$cc;g9BmdZlsd4~FDm=WN^kDbD7qBhMfR8#@0gmYM~7{QfWO1&?=q zq1Rp>(zqVnd>9AgD#!ynZ(@m`%L69({;4MX85|!lM!&())y&~mfoBpG375}?q{W~* zUNC>`mhDqmsTiu?Q>wQbnn#<5w7H_WDeuB%B!yCUo^?zCEZ1h1Z}>UyQs!)_=T^5Y zHL$6$KXE0V7Za;rmBQF13s%d?H7yOI4kjsI1jJ6g!vClqhotUj5#kC8y@w`jal>M+ua-=QPB?vZs8_Y&2+tJk5fhOJhR?$|Y zd8}U`r}#L{ilX>maM_p z$zz6E?U`nOj@HxJlk;F^`ulJJeLPU7D@=P)0UZX&uXDS<&3TY68_FQ~u>l1hieQ;3 zk~MWp;!}uG_cHF9FSMzN`X&;$o@gwyrvexp7xRd?5emWsA1-2OOR7V=Ecd(*N$ODd z%UZ^KDMiy1YZ1*)@a2#b&k0c8KhqY@LY79Ez12)&+WmhhFIX0c^JiLFAP#%3u!gfc z=9N*tr}wYJFo6+put^Kc!ra&i^%8j8TU(Q8KJGnhujw;+>O*Bl8igLs?9S`)0oTqu zz7*}Gm^R<Ujrs&+jby_vLxX1LGqvOfy#lcTT15dShu3! zfY|87^lEM2h$)dK8%XYMIUxHC?gHdGXJ)cqSXildh6$`4h0!vlsy1OnZp!O90v-;j z{`nvvK~3EJuujO$#U@BOo=skWD7P(VN$R!rN0rW8i3iiYl_1=T8q;s4H}ROmBn-rr z998&ji?BGX%zVI)21l_tW-%Z;?x?cLPIfeRv738W(UrK&INo-=OEz!EDwgsCHk`N4 zrIfoW7Dq|?ytc2@t{pD9GFt1&?)}P+$B$q=F=F z)Y<DRnPoDJn}b&Lm6b4f#H zX&QpZ(2g_X=eT}Lf_X%>9%jafLcokC6qb7>2Z=d)`2(r4iht-S5S?MweGZ7KM2z6! ze9+!fn(UeXKVh(?HUalDoH%G2_#S5q8N)T%ZR}?4YiXydr5;J){Ptx&ptc(rl_a8; zp-C`uD&ey3kfVXF9E=mpU7#J|Bx(QC87YJzH(`t4i_DX|Gy`3 zh5tLx1zkke8!FZP{F%kS?8QMFz9cHx&7O;+LOM3EYg%?(kL^qSH6(~mU|`1xa0Te+ zYPY0URS#iH`ZH$Sk|w7%gDzDHe50{-e>3Q$=wuLfOh%osv!KA1wcRPJ3LxTFTs$$&m%(-+6+;oIrLI122bmx<^ul<(?6&u<&aV3Fr$| zt#uOXZ06BQ$b)wze)X4g#e?3o*XgH3M}02D_sxp-vu9}86wC)}P#03C!7gNx>ahzc zi%MiS(}i*_#z!8~Fbx>o$v|DRKnCg8;eb#jxsuiZqmhL=u4vmn^jjtbn8UWtGx&pz zWH3M0Vid#Bdz>MI>v0>7yNOEv_~ktK*t%SL+B-Dy&ped1|F>H8e-NA$2e6(pwKk{# znU9qHv150dE8eY*8U3}-0{TAk;ockS!|_bXe8NL3Iynrig%~Wf(hkSF zzX!+HpMj&P7{sP_e0UMl4LWTkPU|bzGcjCxJMVG@t|EKxPOmHS9EKER+$X>weM) z(0vD|xuj)A9GvEk_6F+&!B@FZor5BHs*TdfXV19yOy1Ibq~(_UI{m)4w*I)H$*$2y z{y?MI&AIZJeXG3;2N3`|4&Gz8oA#ioFj&W2St$9>@(5~HSJ@|Wi#z%?M^U8#H~yzg5z%)48sC$h*E0iN&b6R3xw>2PeuTp)=uEEF*gKQUXczjwVn(#Nd1zZ{ z_p8(=>U}O#8nvJio|Kz8SG=U!sT7yiSyY3lNzxp3bt43(L_AQrkHNe`6Cf%HErFX6Sxxgb8CI~e=gMJjdwmy&KB?B`q3_V zByuptG7N_+uSg3>b7PG@8Xb{*kYU?IJq{DK2&};wY~awNL)4y)@ZapsLLIwlhZON% zWpF^ZAP`S8w@Z zzi6xXHA!zne@BQw+4DMxZI%1fi+x>0-QO!ta5w8e)Q6B(Wccs*?^Xq~TS3nA4?ud@ z@(BWKxtGagAG=d}j`^X_t3;D2!k*)q`FO{>FGN#DK+=HElU!KQMNvlls5If@U7%Ff zf;vXpBGW*y?kD@4#kBQoSz*OFUgRS^AZ$GKNK*Lhkd>2oI za&Io7$1qxy*D*No;397wiboTHwO_>ghvPth8im!e?mnX zrRAd+qk>-TI!eH!HCtfa0iA4PW?yVRE*19AYk&&fwyRLbPe`$uo&&bWFp~+o1sw3} zm!$p^fcR53BgaO^0|#_+cgAR2nU;c#D%D0%H^B)E*$LFuiL=FXg?)2D4STB)jLC-6 zn$KbVtzb&ly%j;vkFT3fyv3Dv5v4m^xSU<9z0$+;7IPC5)4Xd_V!`ZPyffF@QTw2C z|C`WYd#!!M>dHj2s$a|<(6(wg)i*A~>A}ebVmZ5-Y)V@U=~)kUdqUly21-(=xhSPA zf+ul4c_j;)l~h+xd`n!X)Rg=PdPNue0@*|Zz4sBD8?qc5gkO@vdowJ=j*WTajFAxQ ztMD?!tjlz?uINv85e%M;07JyGQ?r3a*x(sGEXbv9+D9DVaXjlvfE-vzemjZwlJx0M z>AGAe$ih)nMvgAZMH=slQs9YF!g0VfR*=hUsza7fD<%5&e{~RSAuR-u@z(KX;a2}U z02#;lc4S=2dW?+EYcPjN@0C&-$tn>AZ0sB%aK{!Er*Ns30C>E?X^tHuX8=Y52DF8vx3$tV%quc>Bj!|4 zU#HB2On*|{_N~XKVT@a30WGrAquwgzbRkkaBB&QnAX*6TN9N2V^~3R-?OXVdSs-S7 z-UHVkd*guwodQ3q0dt!#m`8sJu}riRtqwe=M?X}e#d+X=ZcIXN2wh+C!M3meAO6YU AqW}N^ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__arithmetic-members.html b/docs/generated-html/structnv__std_1_1is__arithmetic-members.html new file mode 100644 index 0000000000..06f493c8a9 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__arithmetic-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_arithmetic< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__arithmetic.html b/docs/generated-html/structnv__std_1_1is__arithmetic.html new file mode 100644 index 0000000000..7d591ca7c0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__arithmetic.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_arithmetic< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_arithmetic< T > Struct Template Reference
    +
    +
    + +

    std::is_arithmetic +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_arithmetic< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_integral< T >::value||is_floating_point< T >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__arithmetic.png b/docs/generated-html/structnv__std_1_1is__arithmetic.png new file mode 100644 index 0000000000000000000000000000000000000000..34a184c1ed528e7a7fff351a1903dbbb8be49cec GIT binary patch literal 1222 zcmeAS@N?(olHy`uVBq!ia0y~yVEhPV2XHV0NxR#7HUKG!0G|-o|Ns93nIFEsSazs% z0+0p92M!#F*jNz;En0(qhZVXRo-6WF0)bzJ-&E#@3KiH2d;6fV|)2O-mos{ z)%V```o9;mRaZV;a_sriZ2bu#vh%q&W&hczGu>mg+WG9qhgMC0YjN(?xr*dGgF64| zKIX-KC_HnOffe5gZ$xcjIb4^G4ZHz3ajmhd0^YwybxpvDy|jIr2%^viiyU zFLWkuUTJw-D!E<@=*oLxPb=)hM&CH3;kmm; z-E8u$NnhCZUYWY2|M9N(((>Q`q;+~;5{qSgU#{%A{6ly7`_OznK2NJo)4IAH!7kpv z>ksZw*?V&O8Lvxb>hV9f-q3r#qw~dX;pf%=Q$nXNahdcl_0kR%SEXL>AQd=6?pS-G z#H2)tHVKB7C+rH098OZ~97oRkYq57YgILZLTI?J^R)^~^whk8{O;a56c=I~*X`WTpE;l2r2UFpmbhzciHYs)bAD3nKjI~)Ry<~nb7w5SqJ5xb z5#9OvO#bI;_2XyHo+)hDfBMyxN`vZRe;E;;^i@^w9y8QW zU-xn6D*t_Jjl!>HufO~v_Vdq$*X0*j<*odA?--*{gZG`gKa{!W%QwiL+sgFgZ%QAt oi&8Hpew4UpO3 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__base__of-members.html b/docs/generated-html/structnv__std_1_1is__base__of-members.html new file mode 100644 index 0000000000..599a2929eb --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_base_of< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of.html b/docs/generated-html/structnv__std_1_1is__base__of.html new file mode 100644 index 0000000000..9f39064c2a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_base_of< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_base_of< BaseT, DerivedT > Struct Template Reference
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of.png b/docs/generated-html/structnv__std_1_1is__base__of.png new file mode 100644 index 0000000000000000000000000000000000000000..269edcce135f9c5fd35491e1d77aa2e38fb369cb GIT binary patch literal 2076 zcmc(geN<9u9>*zjeAz~KTyr!h*^IOoQx9fZsn<3ghf&c<%XCz9w4shBmWh%ItFw+3 z7K!FdnmE~5E`oz;ge#zBPK9_Mbib-`;cX@A*FWet*yJ zInRCW=eZn?4*Jx5t2qn?`xNz6z%dwX6A9AcrbbX?`|tLL8r$%Y$UuX^0Li)KsfI_H z*CDljcXoE3WM!X#x~3P7p(9|BY3&+%9*Km(Oz5b9Bazt~6e9UCdAn8M7&vfTtAnq= zI4h1OCrvuuG*vB_29(F_WBV)#eZY(=8NqeY2Rv0PXC*-RM)RnG_GQ>eLMU9QoMX%mhyP*#Fa0#!^47hJ@ zd77MKI9eKi(PDU!V~ScJA-P0qGC0B*>+|sVVlq9vtQpKcDXM6eOCo}AAdF(V5C_M( zs>QI?#jYlv1fcaz)k*o&w7VV-3OVB7WtkbVRTiZXvwx(Mhy}TjozRn`REdD*g&b@3 zcF-%Ir^~<*M_}Z`6&-Hd#5}i9y1UR%=*s>~U`oDj?`-MlV#V2XXojz5g?#^})gc2T z+VM30il7Ns_^mI$tE`?q8NZOoliucww>fL#%$UgBfLNI>Sw;M-^lCzR+w1)yTZ!?D_fn)CCb-QD5ZwmW!zvO;h=Wj)F+BP<#3?_PKPc4Jkc+HfqRRPu~)| zB+-=;|a2JKmyl-imo(5J@PYyIb||LH`CrT4Lb8jD3= zKeElhjE#EPEDhe{+G~Oav3AWg#q5+MgfA)?QQ_&0+Wr%BMUm+*NitgIMtahCKR~u9 z(%YfNp8~;32i2S4^g+^IP^Kp|C7;zY#w?>)K7UqKpHb~e#VdCWGI{jJDZg~$Q0@^e z-(|{pzIudoxrSvd6R=U*yNo4}H$=kcObX;1jTg(%jHs;qusm>_BXVI41pGh?hyGi<&7yIU9 z&YCtm3HnyE_odQt%s7rYnx%IB0~2P;VZRf@PCs z)XI?HV(cWLR`8KOdSab_KDc0My(_$~(`*n3hq|R|8O_|(+7i&|6QnRQHn;XBYfRQ$HXmIs@8N5MK*iSvfeJ%$BLpuf8bjb(PYl%k9}`zU z8`vLu>SjY@$iV&^XKpqiLkBKwbb{vWpO3ry-{+?eeps%FpFcYd@U}E$3a6Kia>Qx- za28q%6aGw}B4O8D@IBI~TcRA^#I(c}{`An#_5(oz$jyxPDTBaVb#~)a3JUiw2BW9B zVknbrKLJ{Z!EM8_nZWG1wEQ0%Y_ENx#tyBtm?=?`qfo}b8cCyG^G=r-Bqs-9_vBU! z-%Iyx&kOtetfeI;dV(p|179LLaQI_gFBXwPiPuah75zhuB!f$?JD2$22rxrqWLr!e zH|lDJR$tOBy&>kd8yT9!)7pM=C=+AZ=A!uJe3!`jaY>M#lNrA=_pC29Dr@hm!+W?o zv{;uR>=_0GUb(V+a!TAB*>2c~66COgt({;hv(jXdqltxJWaW)rJSm1 zyFSYZ`_h&Fc^*#XWhU8ib==sPXa?;mb$mNmmvC+fm`ka0g=as~a{X*fu1y{Ip + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper.html b/docs/generated-html/structnv__std_1_1is__base__of__helper.html new file mode 100644 index 0000000000..e2675e2dbb --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper.html @@ -0,0 +1,264 @@ + + + + + + + +Cutlass: nv_std::is_base_of_helper< BaseT, DerivedT > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT > Struct Template Reference
    +
    +
    + +

    Helper for std::is_base_of. +

    + +

    #include <nv_std.h>

    + + + + +

    +Classes

    struct  dummy
     
    + + + + + +

    +Public Types

    typedef char(& yes)[1]
     
    typedef char(& no)[2]
     
    + + + + + + +

    +Static Public Member Functions

    template<typename T >
    static CUTLASS_HOST_DEVICE yes check (DerivedT *, T)
     
    static CUTLASS_HOST_DEVICE no check (BaseT *, int)
     
    + + + +

    +Static Public Attributes

    static const bool value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
     
    +

    Member Typedef Documentation

    + +

    ◆ no

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& nv_std::is_base_of_helper< BaseT, DerivedT >::no)[2]
    +
    + +
    +
    + +

    ◆ yes

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + +
    typedef char(& nv_std::is_base_of_helper< BaseT, DerivedT >::yes)[1]
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ check() [1/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE yes nv_std::is_base_of_helper< BaseT, DerivedT >::check (DerivedT * ,
     
    )
    +
    +static
    +
    + +
    +
    + +

    ◆ check() [2/2]

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    static CUTLASS_HOST_DEVICE no nv_std::is_base_of_helper< BaseT, DerivedT >::check (BaseT * ,
    int  
    )
    +
    +static
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ value

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    + + + + + +
    + + + + +
    const bool nv_std::is_base_of_helper< BaseT, DerivedT >::value = sizeof(check(dummy<BaseT, DerivedT>(), int())) == sizeof(yes)
    +
    +static
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html new file mode 100644 index 0000000000..2989581580 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html new file mode 100644 index 0000000000..5d927f04af --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__base__of__helper_1_1dummy.html @@ -0,0 +1,146 @@ + + + + + + + +Cutlass: nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    + + + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE operator B* () const
     
    CUTLASS_HOST_DEVICE operator D* ()
     
    +

    Member Function Documentation

    + +

    ◆ operator B*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator B* () const
    +
    + +
    +
    + +

    ◆ operator D*()

    + +
    +
    +
    +template<typename BaseT , typename DerivedT >
    +
    +template<typename B , typename D >
    + + + + + + + +
    CUTLASS_HOST_DEVICE nv_std::is_base_of_helper< BaseT, DerivedT >::dummy< B, D >::operator D* ()
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__floating__point-members.html b/docs/generated-html/structnv__std_1_1is__floating__point-members.html new file mode 100644 index 0000000000..4897d96fdc --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__floating__point-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_floating_point< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__floating__point.html b/docs/generated-html/structnv__std_1_1is__floating__point.html new file mode 100644 index 0000000000..0d4eacf066 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__floating__point.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_floating_point< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_floating_point< T > Struct Template Reference
    +
    +
    + +

    std::is_floating_point +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_floating_point< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_same< float, remove_cv< T >::type >::value||is_same< double, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__floating__point.png b/docs/generated-html/structnv__std_1_1is__floating__point.png new file mode 100644 index 0000000000000000000000000000000000000000..b8e015f6cf90e5f630d0b4875fd5a15e4cfc7e6f GIT binary patch literal 1588 zcmc&!dpOf;9RDrkQet&DO6I|&>1K`yk(6x8Xy(#Mn@eQ5Op{FRu{tiN7?I7TC(M~7 z5+;pV;Ve-R$6a&Vp=UPtA$I1R^XEC|-}64t`~7_0@ArM)=kq@A_wyz@qwJNH_9+1X zpo~PEMFYS#k(4_t$Vek%z3PD!G@LJB>?9J2l!}&qr*p#7rR>kiX0tu2NN&=l!c8>F z1(0sq8a?kY#sDDq26^^0h6EDi3$n)yRqf0S0z1}E%`fS}U2S&P)T|q}c{?3SP=eh` zX}ZvDl8dJv_`$JzQDB6mWI*+hqd${#q;STIr_V)nsd|u*#uO7-bu1+fs@!bF$yqdr zRvd*gd{)M$9+wqqLdz#AXbAL~=?k&eRA}(U3T^%M9fb-KOUATPTvp_0qVy#a2mjgam^v8=Z31#n>b^TX3b*lr*-EH=Sf`Gbl1MV-T6?JWexl|g}kv!@V zFc_oCvhvkDo!FnB9&T};EbK|uZK>5}ULuz7M=J(LxVtp>AM}j`H-KrAvxJ3IN|oQo zYjl6}Skt5?$VKznMiJj>Pt5cH*jnom{A%WX8J-zxPWPE$KFT1pdg9}i;y&B#si!@i z9eh@CjcXI}5~Adw_R!Y{k+`wj#I-@6xgg^oc~4zqFxNwBJtYu@(U>+`9S)5*-yK_O zauUO{oV@mQc6cwTfrx1^#0TTug-3mLFs%6Ro6UXfl2~?M8>r({& z^Wwf6n# zXXaE-yBQ{Guf*kzTd_;fx;rj<7u$uJfjGSV6!G`fx=TkC@ zC5Qg%`h|+ZmY(OKP(8iI`hzE;TKD0^^R4D?sd z2T6d8Xv0GqjTCyd*uc&Nvb+%>H5q6pTk0`sT2>wMzoo@84zQLibPB8_i*Rq7m-IFf zzW=H$_H7>fGK7|tjiNYdt?0*UJ#5SQD$fRnlF9O7aE-|3I)8^14+P<^rWp2h2wW1D~?Pg!|cv9;A3*|G;XIT(8^X z>-8M7xcOr7-On<yX&k42YUbzP~GrB|A|lj29;Y-tO7VH^^C zQGDrxe3A;MLf+<3G$ddc1lg)}t8E$%*nK0oZ$Fc~ZKIz$y;7#Uy;bz5VbwO9v2gR^`NjT~Rz`5b68>dg5lJ80`Qr h$?|c4O}4M9RPTCNerF8WCH>+7q#f#Pg{{|}KLC=|;L-p9 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__fundamental-members.html b/docs/generated-html/structnv__std_1_1is__fundamental-members.html new file mode 100644 index 0000000000..df714c513f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__fundamental-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_fundamental< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__fundamental.html b/docs/generated-html/structnv__std_1_1is__fundamental.html new file mode 100644 index 0000000000..943eed3771 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__fundamental.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_fundamental< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_fundamental< T > Struct Template Reference
    +
    +
    + +

    std::is_fundamental +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_fundamental< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_arithmetic< T >::value||is_void< T >::value||is_same< nullptr_t, remove_cv< T >::type >::value)>
    static const bool value
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__fundamental.png b/docs/generated-html/structnv__std_1_1is__fundamental.png new file mode 100644 index 0000000000000000000000000000000000000000..1571b366681eb1323581fab628a1910062fe23f0 GIT binary patch literal 1662 zcmc&#c~p{F6#rZ*v&5Xn#$~j_5i7SabIAqF#05=FMKn#zEzPCH1x?hk#mqf%p)xYl zYTO`At<=xRoXpHHDjY&>amWx5mjnk?=BsmN{+s!C-Z}T(`|i8HckX$=ckjzT;O(ZS zs-p@3fEv~v;|BnW0?0m~q6E#U$3}RN(mJpo=OUNOA#--|%k93@OOW+l=yZBeYdRiU zs-*aN`vXwWxAFEl&H@0|s$nrsxO4>}g;T{b-n6E0IQUPw4_cqkI)s>8Vj3y%tu&*Q8hJ)_RuR#Q?1<4Xs6$)*6F)j)k zmNxI~t3(7GLOTd?<6qXUzjDZHKE2Zt+h|K9Rh1$_nV!p;o#h@Aj&%q%UyRN{Wvuk# zf@F0jbMb*xmKL@3#dJ11PuZ3nO3KKzpL^w!vB)I7nI9}7r3{OPvh|nlMA1$N+8nJ1 zX>PhQn&{6BnPL5uEFOqHjSjMRkRLv-8O}n-lM^vToAzztJ*^G+BlS!HJY<0Tes`1E zvfas=WNi*!9I?Ix&KY}KY*%yJ$9K^FsHrL*?5MA+H0kY#3@EW6f1Fe$zZ&}bT(hC* zL53(R@hX>Ge|A)JdPv`6RKKEdX8YkjQ0Ua{dtQcZ3_fj74ZDKZet2kBD&*s8QcX6O z&~zKMsjQflpEGQ{=Z$1zs7T*8pIVTfAv^ckfN{B;s{}o%>#Y!@hMH-xFr3+%Ng803N6c zzGDw57l0pe;t@#r&JVm7mda`*$9_3Q@*Cu)aMfw!g;HH#+g?YfdpCw}$d2}8^5f*D zky`?h(LR0&PS=xC3|M?gNgk%0VXxT;E01Rfo*~1r1&O8Ide!j$1V8Mmo;8`QVhx(D_b6 zvxoMsPoustLLdn9LaJos6A1`P}_|knAVtId9d|! za)ypObBA-AC$oB4^Y!YjmaKV7C@H(ls*Ew;;3mH;AVnddl6rLUKMKrQd@b49b}Aw1 z<>?4uBb_$Cjt*$$Txk^mYmX3Cms^F9PBOL6QMgRW<9F)OffvQm>&yYUQnMZ8(;tEz z0d5bpDQV^KzC$v;hyFkAS$=&532S6n?&q=FS1QkD^n56w7D?>b9w=KeVV63(RopyL3#Jir)bB#)3hV-zxrq-KA649cBg2TLVh!UU zJ!(%j;2cEA(dcSJQc++Q-eQMNm)Vm}7-y2p7=6FT_`0GG3$`~l%rYnKZwl!g2kSAK zI( + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral.html b/docs/generated-html/structnv__std_1_1is__integral.html new file mode 100644 index 0000000000..ab22727ffe --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral.html @@ -0,0 +1,126 @@ + + + + + + + +Cutlass: nv_std::is_integral< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< T > Struct Template Reference
    +
    +
    + +

    std::is_integral +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > +nv_std::is_integral< const T > +nv_std::is_integral< const volatile T > +nv_std::is_integral< volatile T > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral.png b/docs/generated-html/structnv__std_1_1is__integral.png new file mode 100644 index 0000000000000000000000000000000000000000..103543a566dc2fe2279e23ece38bf1a5f84b57fe GIT binary patch literal 2010 zcmc&#dr(tX9u6=(#30*gEeNbkXXEmchbWZ-LJ?ZM!UPmdKp+BzU;<$=O9T=FSQS?B z(OwV?j|j9T0|^fU!V*YAiIkOqu*55Z4FLrMK_FCrt#7qXr0@Yzfpmqpc)~ zh2o91)@Db%JVU;q`a1CF!88|=R{8MzC@5(SMRMb${N9N>!>8ia6}7_^wI{Uqn*t9x zok0VgP4~c#39R!%-`!_}e`D{60upY~9Yhj%nnQ_#EcH>!8d*N)DqZj?BQmR}pK&Zx zrhc?5iuv4={N49G-_39;@#-VN%8!p zle&Z!jX(e^LZ7ZCcNL+q_c9C}R_<6a-@ikX$cX95rby?Sp$Hzh5@9$ZB&i>eNXrqCam%kw;o;~6?=P$!w>B?ot*{a`1E1y4Y z9ED4xfbY-$r@!>8E+aG?@bcCQ4IV7z1VPUNi!238_Un>Pkmw-1CE zofZ|R8@!se6O;ETGkvjbi~*H4AEb;4j|618CTHO?)bSRUANz;9a$%h zHBs9{;PR$rb`EFqK$LjR7~9%O)2)S>jGcmc+!(G3p7b!$HQDI{U=gY@rk_OjbEdvD zF-O93nIpycm@{D}KN!^&tM*X{aOfx;zY+cg3}Ht0zkylDRMzK%_>L*0E%(Z4R*SWR z1?zMBXJ5wKn8k#O`=45f9pJT;?0(Mv?sRgTS-KxG(j4w`L}tQXywp^ml1#juVp9S6 z`0PG}j3oJRFY(|nd<%Ekl=wws#Qm}3s=coZnDQrhreb>~b{sbJ+Co_97`!u0+n~YI z?GyExm&?=QhLxy2cXc&1(1TkzioF1>FBGm{UcSZC#SL z2LP1?-$Nlc4gdkGJgO$ZLwmmFfoQo)p(tCm@6I67l-2+CSedkF@vm!n`77h96C6Wo z?$C}p)`fF&#RIQD3kHt>LjfgqPO@c!+#p5-c4n|~|K6G2 zK4CS-fXj^_F(&S4D_>gnXv`$4YWwmZU%6n`vdaq>uNHfIv!!z~*#T02X>)dCnYNJs zSyk(gfqg=fT7G>shkiKV*Bl zg;KOzwtdT6_QXyxzq?06m#Fy0M8`0jf5>Ai$u`+I^w6WHa?zys?A7QRHmz3AhKhXn zF9HiEPrZMx?DqVp3_~q5QPjiQ!aqu?q^syD(G(7Cx74uo+%qy{i|!tBPVIcHb&2P= zBLOOvq)z$ED?{#n`)`w#%n2bk2###mNlIE05uW7y*2R4sSi0{Ovyg#h(5- literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html new file mode 100644 index 0000000000..4d478293b2 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html new file mode 100644 index 0000000000..fbe4e2793e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< char > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< char >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01char_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..dd7231fb6b8ed075d6c58b39e5464f010426ca63 GIT binary patch literal 828 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~o%VEb45?szJNMzFPYOJ){M(K9{a3EP z)#Nbi+O_rX-no?R$>UWl@bLI=x?aI-+SD7Mrg75S9q*sSh=D!I_+afq? zf~>1`sq&}fi_f3=GIU$+mOi6$)qm2f<$o{UIOQO@QvKeI$tiYwE!zvXZS9J=u*O|} z$?0YNFK1Nmxc6{pOL6Poz1J3&m3(e7Gcf-iFQ=R4SO0u_|IZ)BGp=rFz8N#g%xUGN za>MBfVqaL7oxJI0rn+=%T;i{<1&(In6Zn?(UW`3{chTFZ8~5&RY1|uSS8l%hcI}hA zS)W(rre-dTyKOMntbF%6t+3inV!3@E=G1PvJgazx^o{phCViQ2^^I%t67G^bL;2!; zC*E!Jn56#Ms@l9qMfTFk1=CYMJ6lXDX>eChv$aBQ)cbXJXzkoEj#mec`@_n z&E=Q1XWz<{7TTXN*KhXj7gg$8y^1fc)3ZIG82jlSONG1r_Aj$%6w6menpMZ$>lS)o y{A+K{K0DQh?k}rdxjz`gLhD7}%pk9aU-{p2>esSQ5C{b(bp}sYKbLh*2~7atrG>8m literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html new file mode 100644 index 0000000000..6005eff67e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< const T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html new file mode 100644 index 0000000000..83e32731fa --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::is_integral< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< const T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< const T >:
    +
    +
    + + +nv_std::is_integral< T > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..fa5f4449d1d265817df05f1a24f14c891ab27aef GIT binary patch literal 1170 zcmeAS@N?(olHy`uVBq!ia0vp^uYtIOgBeH~Zh0dNq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?!gD0;d$hEy=Vo%?puAq5^+{_V#5{wuHl z=Iqq9_1Y_|39{>=ojoj7j_l_-Y*3=a=K0F$pqA%b%?Q<%wr>-CkJ^7un5S|wv-s1( z$;KhgBF92*PFGo5+ES{1t~}!G=e%nsvKN$!Gctc^e6p%MZ~XYk8-qH(WlCq>81L56 z&S4SW{)q3|JXJk9pj$|dTl-PrldvBc)BfY;^(%> zDaY>~5175@TBP=~NgGOb>|a#0He32!aM`zqhF2rIx2);G^F zkFz|}k9~R!b5|%|JH1r?MqkAG6?YGOJ0evpvA~s8+o75(tl^u;8U|n(Gu{f`z_ev~ z0&9l(BRlO}cmW{FMzV zwGr^d7aQ50DDg|+!1q$d7cR0JC_7T+`j%~*`n4B_8FaOX2tDNB{SdfxN|3q| zW645TaNkRv8-Bm}N5;?J&#J%WCsjVY6Uq4PRZdvLu5+oinP;`vgfE|cebI0J&lP(f zJ5LO+zOy~^wWZ%#?qlaqZqaH#TKFw`ny#>Xifs9t2;nCa5AZ$?o${Tg*xyP+gO+Q_)^AU)hM!IU9Mx9-{p#cVYXuiy&$}1?eOvyW@Oshre^awo zY-Rb$_xRU`J6YFf7u4Rk_M!c^!GV*%`nT^pXJs~HdFbY+p}!qje{A`u3Jhef2PJ>Q nd6{dXVQDQkGT3V+|6j&FySOD+%IA~;3lIiRS3j3^P6 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< const volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html new file mode 100644 index 0000000000..cea9430bae --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::is_integral< const volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< const volatile T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< const volatile T >:
    +
    +
    + + +nv_std::is_integral< T > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01const_01volatile_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..10bfdda5d8093ab1da2ee6e550942c6907efee90 GIT binary patch literal 1219 zcma)6do+{@6d!ri&RO(U9&3wX8&+N;A%n8|glaKHdWfiaN}5# z4jKUSPDx(iC?_bdLK2;dX}dksWe4CMb$11nm+ATUkSgE$gP!mnUXJze!I*(69}dy6 zxQ{}aW=Tm67hGoUaDz4j=;P+p{_rRLG^0Y4P6nCYN?eNJfswtCg=1Gy#WJQio8C+gK`VDSoa3R`{__KB)56a3U)Cx~{e}wk_`nBfF+`B>i z`e`QedwJ{q-ePITBX{l zAzjjZ5*j!^m(ow*9c}Awe^_zy#pPvgh`uoQg3qcsqca@<>xS~ojDbn03k@uW(Ohxy z^hOVkrT9SN0IV{YZbRspSiiHFCe9jFjXVvNt0^{BcOeH$RE!+rH6et2V@p%je>I+Y zaV*`*X#hwc{Gx~@9`*QG0J~@$yK<(ZKo2CYB!x+HDzI~l5V#xGfM)mRR_Jy7QM7cV zV=0RLfpcVXGk{&s)5CW2h~N1d_AyTJH5%B4Cit9OKQQAGYXS;j4Cg^9t;nclsTZzB zoN!BQ_=Bjq2b&pu9_};??Df1kTtF#JRWna^)VV--no%J0xrY)G0+N?cCuEwxupn-L2r$H)8*slqJ!zC(I{pKoL*wX!+?v}M@R}tiDJ>FQYt{@A) zTs^^*^w`XsI^6%?9DZe)>6eFdC8sG3rhJt^XbFc)XeL$DAG?(YCL<$-Ek3@V-4(X2 znj1Ml0}VfUZ`F$9eHt5pD_K|ik62-?8KYXiEer7M<8ZPxR#@{q*usCYC2Y*W-mOF? z2o_UU-v7$e-sea&ZqoO+@UF8xEl?39v!>jlXg7Yw**RBXOdYOL0%I>1OoY@eXtUgJ ztQ~lrId3n)Q$J1C?p40>QZvq$PbrTr7R^ zd^ahsHeak!SKV!u+Pj@diesl>xxtC|#WT|i?D<3zTZ-O};AcT)>S*Teq4>}18#M@f qN-N8hSrGd5PH%O-Iz$;x*;H`Z2t;y|l2YX#2LU*HoTzgOy892+)pHa8 literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html new file mode 100644 index 0000000000..976aa62687 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html new file mode 100644 index 0000000000..8035a7e0fe --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< int > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< int >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01int_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..a3aa3bbcb34c27275af1cb10cdaacbdd8e330170 GIT binary patch literal 812 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~?e=tW45?szJNMzF%?doO{MTpJ{&)Wu zC8{VGyR~e;->JUss~woyTRu!rlxX{rG-1h$S2HGwxXK1kTJ(Y=&FKHC>92ez%_*}s zXW!YjZ1S#;7j&i@@SpoQafav1%!yOy+T6SRC3%r$@4V_`eU%4(Ser_kf1Xk4v2BHI zh#s$N?yiSFEnjY`czz(c@33szq*IfZe7YhZ8vX8sZR+vW-&A%UH#?svbvml@Tt;0> zY0{#b$8t=fEvyq}E-Gw^&en}BegBLvHz9AG{r;*mmVeUj*ggAvK5^PI*)y*uT#+l9 z7I$>coXQLF2C-+%7HMj({VJViEuYu%jB8Wl%i0-_cg)_htmW-o-}gJFo_>CAndM#g zbIWe`2Zb#Yx}_z(?$Wud=TeTY@|(6~uC2~BckQ*e+c!1*_0sm#e|1At&C~qFos`@& z9~I2EyH1KKx_&d%d{T*r`rLbBXU!jaT;knjoT>WK`KqC+W|!4671xW*#S(3bdl>#j z*)x3nDm%m8KK|fO*%|z&@*n&WIun=4_-I8klS|-nmX1YzoC2=0!U|nhY7Qc$J`EbL zW-tU^F=SkXrr?3Dm*&zRN{lYyFiqu2=jMsE-z)xkZ|>z%xh+gLu2~y0-m+@0yug(Z zz1&cq&A)ri(@(Pc4?Z4sDVB-Zr~bO~$qiHKp8dg>t!7OXJ-|BmxXkj>8JwpkAF#cu zaXE&k$lWkvM$go@Ocu+yFXkkh&Ej4Ba=Xvgz`Re=JvHd11CeY)?$j_C=gaw%TdUD$Bon^UIUnp4nbezjLC_*xSthcSAQdMQi5T z4Oh7HLT+hhUgzEv#voqud@pB#YyF0%>juZIKD}0Z- + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html new file mode 100644 index 0000000000..393aff41c3 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..34a3098a5c1f228c24181e422fa16471f07229cd GIT binary patch literal 810 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~?euhU45?szJNM$I)e1bW;%dK z%+isXbpL~{Ld%j0_^T8jMyfY0ppUs(M`-&sf z!#Qwwo=ct1uAFzTPWea73W9SXCxPp~cPPh9sn@8T@6ZRO^*55jck@1FgvOn&F= zcQ#(pGovzA=kD;cF1HRAU2PZHRo1tmLjLWRcQU8gzFDtT`5Sn=?vTn$rn%8;dM)Fg zJlp6IgjkoczHzbHx<;tkfKUVcMYaY6e5#6+^~FE0UR90*|wFEb8MF zaFrES=t5I)Z_$*XE`1?~5}2kE-e)q&oKKUdz2Cih=Sgni4{|;Y+xE$S&il3DY*by@ z@%i^Y9!)7W(Yvqy{$|mfS+d8@XRc_RsoukVz;fvRH0Gd!aM!*g6D>Fz1Nxb@6@|BX~N2q>@zkg+y1e9 z=6NySFt#tIZ~G?BC0qAt&HmQ + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html new file mode 100644 index 0000000000..58992abd0b --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< long long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< long long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01long_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..9fde52ca0fc86b41f98b721e8d1ce388400b5ca7 GIT binary patch literal 829 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~o$+*W45?szJNNCR#R>wh{MT1i{&&CU z;<~_9<=9N$(i@-7N&RBx<9!>yEOXYMZqG@3vJ<=}iSzn+ep)1VCg9KXy(#&VQm&ZD zSeSe>b#zEJ7xtIfNPbl5mg z+qB?>Gry6&`dzVnp@=g#?KtPnuL#LfUkd^8)@a zL(izrWVJnLVrso&*^&*{CDZ=e+zB`MppOsru6QYwaPGmu!2@B4vyB zow%AiamlPR&);N8dA_>T?U}#&nf^nMOT4>2U+}uro&^sn_Gc!(ldBp3Y^`JX^V-^= zzHVRBXKMrbDfbWjRGqQ-GSiQX2UtEVYUF(2$}D`Kn@z0&7{CnrQZpFhObi(JWF#m0ph%d@*W&gXjH-}kM4yUlFVmNQY2Ir|Of zmc5<1`{d`eT~F7P-kK}8VBzZfz75*D3~tNHU#>f|vwzN<-fiJeojETA89!;QsgFAQ xIhIM|3eU^)-> + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html new file mode 100644 index 0000000000..1879b40c50 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< short > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< short >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01short_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..204a67b005592253811cebc30a117344f8bff2af GIT binary patch literal 824 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~9rtu`45?szJNMzF%?doO{MTph`>$ND z+vMO9{x_#wJ@xqQs{%~z4nIzt_)e}j^qf@A>*M)pQKP13TnUS@$^TW;U-?ezDU&vz z+p|!5iJ6?#Q=w-E4gaa61yp=JamsPt`^0~ln=GrBPrETe=F$6{tV1gEpG>O0BDiV- zuj}1XX6*5lGpO=a|EX!xMc$DgPsyo#-ttUjnep1tCp~Glw-kKM ze$S9C=Du#F<6P2LkiTx}%DlTV3wh6+{rNn+d*-G4Ki($q`^S@J_%8}%k!c4^XUnFUfd7dnS~E@v#B+R^ZGF8OU+=2GcjP?laatwaq$4l zheeH?4_wg{+_JJ;Ulm^_%d!9U_nE9byS+DuDN%Q~RrJaK9%i?XnxtQz^d1=MOLFI(%{@~- z&$s%`v|R?%&*Vj|x>=@`tF`!b=a$nGwBK6pv3~E{p#AH?iR%W*vOfz$zwfCMS3NNM xOZLq@C8wEVF6HM%HSlM`LPtG~LvhD-`^4Y+ZAu>>Gy#)2gQu&X%Q~loCIHYzh0Xu~ literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html new file mode 100644 index 0000000000..50ae470f58 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< signed char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html new file mode 100644 index 0000000000..a376fd7e57 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< signed char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< signed char > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< signed char >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01signed_01char_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..655781c81b1f29c016a8e6066a637dbe48d8dac1 GIT binary patch literal 878 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~{q=Nl45?szJ2!FiVg;Ty`MC67|LZ?( z)pAPOa4YwFnTGkkYO|ID&Q5<$nfM0(Pnxvko~eQAN^xEv&sVPL29tlTuUdIe<>bqP z4KJVYBu=V}gqCA)?371jHnmY;m~XtnH2_OSM!?~{xY!{%RAJ--lcuGgF@pYh$1EJhROa zn0!{k*dW^C^#)zRn1>nbQ&-64&I@4Pyz~E`SBBTWNbgTRbKJf?(uaM5>*uYzCfk3Xxm)r) z@YbnIPK&oaw0e8%txxEiUn!cmOb?m;&RJgiJix5rPNwS1cB}84lb4vkxRqr1tY+fj z-W7k#lJK-z+UJRfq;By3Si5QV?*r%LZ~i=* zQe39DfBMbU&uzB&&G@gkELJe*wXR>bkF|aMv(tR$?df+6pY54+*e75f%e$3(dUpDp zU`{<$X2*Fd+3CvX?a6oFZs0L5kiOYt$hn2$TJY1 zcKnWb?#Y8eyS}DvI+=I;=)Iz}XY4b&o2_5|kYb8BIz!K0kAH0rQ>K2sRpGzpn#kbA z%?uH;Nw-pV{eE0t9 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned char > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html new file mode 100644 index 0000000000..9c2697414a --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned char > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned char > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned char >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01char_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..f2abe7324d7f2eabf45af769f045d7488db95a3c GIT binary patch literal 883 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~GkLl=hEy=Vo%?apZXF(1{_8XM{a3DE z+cZI9?dx0jw>#f_SZ3OCKvCd->a1le>kU1F-b>AxbfueZnabBi#xqX-$(Ea??pb8@ z=1yr*n}J8Rjp=FCGn0$|c%E7EMJh0)$!7lLPsy8hRy?1#Q`!2={_c}4C+%)7?a5F! zQRH>CE>ZuKeW~=LUc>P@cV*KiWqAkXzV%MlmR-91T}870r0i>R4zrxT<)W8y?=YL; z%^pMM&8?gHUW*(Q?YkDW`_{d?vbPT2iLI~vGQ;xkvG)?6zdkou{gPqxPL*T~P`GsJlK5A5)O{x9T{y4SZTP%f zCa+t?wP^n4)rl%Er_@P&*_Kns;SQ9Y+a5OQ-wJRLHEMe9ebH&@d-=*E)A2Ysm-40{O*$On39e$^Lp0LBJX z_2n5ibH9E&{OWh&0pZQEJ9q56thuX)Q{eUX84T6C*gyaLR;3%Yf6eFXrF8~3=gfWm z+sLZ=Tx{imKf7l%m9A4RJ|J^*#_f!!+40wRTfHf=K4ZDGp93lOsGuGKqQoxp%)l zeX;Gb_~gij`~5S{yKx(cvu}U@&TQY&63ZgXd#BYVY_mG^>|7?tr&M3-uTONBna`4b ze3GAkt!C!KyIaJshc}v+7qfr9bGPSGymqd$ko>I$m!4(q&~453D9$i{cGoLfwq_$k zmi6DeEISsRewZJ7o$*=CXY-frKGvVO?4~fhFRfH?SaLq?^IttimR&Xi3Kx#^>mK;O k9v0A5Nz0}LegDNj@4fziMv)icz#PKh>FVdQ&MBb@0G|l1$p8QV literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html new file mode 100644 index 0000000000..7490e1c693 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned int > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html new file mode 100644 index 0000000000..3bd2d1195e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned int > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned int > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned int >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01int_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..592a80064efd0ce28980383dc5ab811cc364e43b GIT binary patch literal 873 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~{qS^g45?szJNIGH76l$x{_8XM{a3Ev z&E>_kYu(nrR*_bB*0Kp%usn)imO1NDx96lq*$G}jBD_AHU5kuoocyy|Zk4)ck=2_! zvQK#KDs26DMQ6H!|GAIqhLbN{Uc7>((q{S7<0-|GbE}{A*(Cgn^-=SYo7^taZ{;L< zDd5UF>xuIxuj?*nw0UN8Cs8dk+w)e`-=Ldb?k{ha?b)P$(lq;AqSfgvC2rJ~eF8zLmxE4Lo-_zW2R$e2#um zd9=z(_3xIurQ9a@y*#^Md+O)*0+p9+cPlegU-oCgLyG;GiEnTPT3I*>ZD7H6_>t$l5_t!%Zt~$XE0P}-Cdtx*W;gk z@mJ$q`%RylPd>A$sE*(B`NU3b>ofbiPx{XCt-P1@?P#j$^^3P|GL&68KIg7%TGLw9 zg0)hqXJ$LUydiUKt7taU{hpnD*UqFlGlkwhnI^wE*{o;TeWR;>M^lo`t1lSue!2I& z-V4EHuXWr{CU0vmyZ!a)i)%BEri4BIzW99l;XcfVYJH_EN(@9QN;=WN_|=~>ZDzg=x6 z?b{5?fzeUX;P$@GoN?WYPdeXq-kUzF`5d2f?c<7b?LrT>>RB^M{Pi-6e=p?VK3$ga ra^33-On=;AA^PQjkLJ=3zt~s4(0{4NA$kj#FBm*s{an^LB{Ts5uhOJE literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html new file mode 100644 index 0000000000..dffcd7d1d4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html new file mode 100644 index 0000000000..ea0a7215dd --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..b28e466d7fb204c541a3893e1235a8172e0f887b GIT binary patch literal 876 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~{q}Tm45?szJNM(H-8ww3{MTpJ{&)Wu z#oE{qa`)T!w@cm>?3VI)AkguDlg{Z!@j8>9#0m#b+LMvwHA#H#L7uDoA1$nyq_Q~i z`9p=|7i^ln%hPtYRu=WycS?%Plk?O(@z^rQKGbac#f|B2JD9KCKO558nfz$NlR!5O zr!bMFuN|K}U-J8rFGKfEKJyulr*u7bPStPKEnPBi&*N5Gk7ZY%Ni5sEMy<$cv+^s2 zP_y4NWQ%#j`EM+L5%cg~RB`yt>K)GPGumJK_lp@X+b?B)tmdEOnH;GEvFS^CCiuOK zJ)=8=wYD*{sBD6-=fbPnXZ}@IIGahI;9J%k7<>HT!nIyEUgX_6uy=FK+q|&tzt7w? z`91OUsW7MN)WadSBHpeFzIf$iO-6Fd-QPJ&O@B`iH(0l2(iiW!zgs*nNtfhqvU^rF zarbQ{PurR2Z{9VSRN^Jxyia$w{Go|U%wNoH51aIFh1xQe%8N&)sOVQE=$+oQL;S%0 zYy1uMwd~K%pARpnV}I8CN!nsROL15fgZ-=-3~?rgjKF|psy~R&WC9m(@{4jUZ&Es!oXdkP3q}CT3 zBfI-%^Sqlr_HS|=f6q8Jr*G2H9RA#UH{QOOHsfT7?!kBN^Vj#4F;tYJ*)bBo0(xsuF0y;3t1}GzV1uz z=THCqRgbZx + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned long long > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html new file mode 100644 index 0000000000..bd4b65a1ca --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned long long > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned long long > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned long long >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01long_01long_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..edf19e6983397a16baae5dc1af891829a25dd2fc GIT binary patch literal 887 zcmV--1Bm>IP)Xefde7_Wtyfbo|q&lng+HcMbW^P zq$s+Aoy>pLOZ6s6PF?5WAZKVMsg0**`3UyOIsXgnr1Z?NYPCMz)AGlh_A)wqW=9?{ z|6NQbO&*hntPypFD&>AzK7-BtoOD~fOCYIw_Skc{!%k9pl2o4hM`c?btEZeuWlxfO ztU0G?ENho}MfD?WlJ04@u^VO!YX@voHd;Vy?Zs7l4|nVU`*8gp{0KXji;3%FKmR%G zV(MU5m#WUNR`p%%CD{@9No;qf2d6D5?ez+~T`wOv?L+7ew$EvNpI-h zWPR+pF?6?0l6lKjEaNxxhJTauz^U`tXIJ+P-~n!*X- zU+D)Ro?>GE{DkC=iT$9C$Q=}WbaH?=8rT3)G_V1pXkY_G(ZB|XqJa$%MFSfkiUu}7 z6b)>EC>q!RQ8cgtqG(_PMA5(o_^V1OrFbF%MAN_qh@ycF5JhLO^N4Mrk0`&pxlZhF zZ^-I#@0ad(twy>F+zu_E>Cp+1JwZ*!vPW$z)0W@?yup7~KezV8{eyIjFKokvZfGB!kr<77S{Q>fLGlxMfu6_Uj N002ovPDHLkV1h|WwI2Wg literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html new file mode 100644 index 0000000000..42071ca69f --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< unsigned short > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html new file mode 100644 index 0000000000..d9550a3aa4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_integral< unsigned short > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< unsigned short > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< unsigned short >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01unsigned_01short_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..109c6d717f6072d8dc1185ba4f87b89da6b5936c GIT binary patch literal 885 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~vv|5VhEy=Vo%?XoW(6Ks{_8Vq|GWQ- zVpTPaczr9sOvC!#x;6m|hePqpGH3k~pRDqAU8APw-i!pVAbqRmnP>LDT3R((W%=P_ z%O)GQIA%Rw8K{>yPv)_IlG@Vr{zXDhKF*sazj=0ddEQLvO^5&Np5Zg&&E#0Gek&)@ zO95A6r6dcG&OG{sG@O`l3lGi2u(sx{wmq>rPmtgp8&WVSc zJc3lp=f?6XdoKTycHwr}XJ?B^CC%?_woLlsZfc;qQk>Vv^OYC+%C*+}ibTe;eHEJlGeL6aC=ks~S;Zg=_hSj8VJTKY#rAM=MJI+Rvi7@n`bR zoxA=0l6Cbt-<_aHxnr_Pvp>3bUrFTI(k-QayT3j6ZoZs!Zli2k(_7C6^G=`;Xta< + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_integral< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html new file mode 100644 index 0000000000..3546a2d616 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::is_integral< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_integral< volatile T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_integral< volatile T >:
    +
    +
    + + +nv_std::is_integral< T > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__integral_3_01volatile_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..7709eeaf8bc3e7a876d89c4c3f5d243ec5792cb8 GIT binary patch literal 1188 zcmeAS@N?(olHy`uVBq!ia0vp^uYtIOgBeH~Zh0dNq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?!g7<;-nhEy=VoqM}zwE>UobaUgM|NZSW z&ApEa@Tq1V3p+krC~}{Ux7d1~!v-ZvY@V;24r+P6)r?SGX*)O3_uc$T!~97hFE$ik zUuh7cUU)3z=5&?Z($=pk=h7q2{?5E+B6~rpI3u%a#}2=H#piDrTKAkk`DEG5+O+AZ zEpre0ybO4AVeib%H!Hn2>2B_0Va=XYq0 z1k>_k=QgOkt=~9HavG<(h<;>tAS7dqllH;W}?gX13o@u4yx&O*a{#7b> z7jk!BjXZuj)>hhc)yf}lV&}R|@_Tt|$@ZgPeSIdCOn-MK(eqat>uQy)%M+$9`SvA6 zwk`36GsFDr%nO7|kNm3m!hBosweVKy8}lL-^D%E(k-(bae~|0Kye82FwyfF?)m&i> z-$d3h0K=Q{R`3RR3K?4ti?PCqNZsjO*!f95iK&tI+g z&S`m;{$+o?lT-Dj$MuNIpO-xUdRDG3=kZJXr;oCfKV8nXJ-)8;z>hgT@kgxQ?MUzq z%JbBk^|9!gO~T6)%x4s5ZeM*P{BczJ+8MGdUVl$0yO6Yr`R;GiTk#wHJfCk5NWA;3 zpzqm~lhaBKMIBN%PFnK+%G@=sTe>pd9}Nw<_3HPdSLFeBt_%G)Xpdd{``GWOxT9P5 z`LJ%4QgDt~c{rFa|I<_b*Beysu91#VdFf%kp^x=hYLaYasQrpbCF}PouUp^mHK}g( z`jd&CRhyeaCjkS(E2z3^6W?KjFNzK4Uo&3lv`VVA`PKNg<7?+F-W>lmf$fY@A()|s zH#i@x_0nAG=fd>D2^Ri;n@Z(&^y#ag+Rs1T;^xnq*|)w4GZna*Z(_PqDx7`xXMNfK zuCF)$Ki-gGd*NfiadGkR>J!NlJ$boC z{&n4sU!L8+qyB5Xxx0(&wOaT8Py42Y7p~R!E?vWxaW{P_-+9)poBlE7zy2`e#rt)K z8O&cl=8j8}J$>kOjOC|QH35uvi~Wy({Tj%)$Gv{SVTSreurya^cS&>UmDYcZ(z0 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_pointer< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer.html b/docs/generated-html/structnv__std_1_1is__pointer.html new file mode 100644 index 0000000000..e51ffc1b34 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: nv_std::is_pointer< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_pointer< T > Struct Template Reference
    +
    +
    + +

    std::is_pointer +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_pointer< T >:
    +
    +
    + + +nv_std::is_pointer_helper< remove_cv< T >::type > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer.png b/docs/generated-html/structnv__std_1_1is__pointer.png new file mode 100644 index 0000000000000000000000000000000000000000..930d7ca52b8e424657068d5dcc1a822c3ab886f9 GIT binary patch literal 1479 zcmeAS@N?(olHy`uVBq!ia0y~yVAKV&J2;quWb15SQy?V~;1lBd|Nnm=^TXE{%MO)J z0J6aNz<~o18!N(qT#k|;zhDNSA`rNGD#VC^fi=w2#WAFU@$KA;MUS-vSZ6zj{`)_9 zeX?hpqlewMWBbm%Tk^uCk#S*KLwMF&)0JDpm{+N&PAZwcMb%R;(|c0PrD-ZVm*{#H z{S=H={qs_8m%68vMXn8xl-9E5mQzMEr>oc=^v;>-bT4^zb=~5%n{+O@Z7)duDO!}> z85g~dH~YZX??Iw%k;&I4taZ0{GJjGR^_b)EiM3WP-j|eZ5?;Sb*burTtm}%`fB*39 zra8K~7kB?s;#Cd%ep}lw+q`IY!Ngxh-IYgfEAKSjZo0CsFX=$;q{)wSznxsWGT&Cj zcDtxp*>=a8c>dc_+k$E??_B$S{=u--RvWLr{ZrSH``R{TwREC%!FSO~CGGZGO;5kA z&GXc%es^5b*6x43?sTEzld%0R+1FJ+>@Hmy@mtro_I>6ToiEoGy2d~L zeeZS7FT0JtLE7xxJL>M%8BO|iDM$Bn$9s(+ZC5F;^f-$pwIum7j;%u8qdCEEMug$LfoO{D0NBr(T39CKAs$Wyh%j&uF zp8xpgHz%%P^TF*tH}X=R`%PJ*J~RIGq?C{CcU51y&saZoN!YV(>)P*Jd8c#g_npIr z^{1wpXL`;mn$bTqdXh-y_YM`=55n72Ufx$gPCB3%(%S~ktJi)r)F{L=a5gyJVlw#o z%PW>a4wzJ)C~226MgSAb{1Pf97R)sGY2`LAO;ydMBFtP4Oa*AE@~3^y+Wb9xf7Gz8 z{c4f7dm}^KN%b2{1r~2VOgmpObM5H^_m%VSOnNYj<9hpq=-Q+;vC|K{zqT~#?RJy5 z%l@9ujon|gA#}l1Cg+)Dvd+~z=RUq6`+#+i!S-j{)UG;SUHAG;W+#8tq0g&p_TIYo zp=sOpwqUz=N0OsoC+<_b^U$wm;(GJjQpGB1bEZdcf3V+R`=fN8=hrkoytp&#{gM6e z?@0A8$-S;yp0xbIAHSUTzh&x73;(>c7JT4om+$s|YpLy=_wOd?zKY+@`e5=lhB|xH ltXH%#)JxM-bLpMu_FbLjIkPJ4Q-P%}gQu&X%Q~loCIHL&#%%xq literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper-members.html b/docs/generated-html/structnv__std_1_1is__pointer__helper-members.html new file mode 100644 index 0000000000..b1d6a93db6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_pointer_helper< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper.html b/docs/generated-html/structnv__std_1_1is__pointer__helper.html new file mode 100644 index 0000000000..cac3fa6088 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_pointer_helper< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_pointer_helper< T > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (false specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_pointer_helper< T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper.png b/docs/generated-html/structnv__std_1_1is__pointer__helper.png new file mode 100644 index 0000000000000000000000000000000000000000..ac03b2671ade2e15cdb336f5a65f20169b47e320 GIT binary patch literal 827 zcmeAS@N?(olHy`uVBq!ia0vp^uYov#gBeJkzgZLrq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~o$_>X45?szJNIqTYXu$`{?{}2{a3z! zOy2R(Cg&jay{uL_uMQ{`J80C;T$cID#b?r$R<_kDTbCcy@_f6p&2(q|vx7FCK?^S* zbPm2K$W}O4Gh({R+pk>Ns^`)p&K8?yN10_zzPV*f&5q*OJFidJY>;|(e(CAfNZZw^ zvlEqPOyE1}XWKPr_uS}7xid@EG$JNXiJnxnIo{{K*@oUHyykyBwPwBF*7lXK2JXPU;l%XXppop`~R*BJ*ZPxQ&<1)q7Pad$VeVr+IdHbX8pYS=&;Tf5os zvdLG)hV!qv7YLUg`StZGe~$Z_`z!7q_;$qVm4QRI)Eb7jnH!k4EI-I~VIG^dLp33R z1JP51yykYWTxf-y%npymHV)yT;NAh$Oa>*AfqGJ^k9?K=-f27pud+sl1L+mlJQi8Y=$F;FH(M!@pT{f$iP8^}${%x&JcT+%=eYa{a%1z?9D5>FVdQ&MBb@0O%N)Qvd(} literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html new file mode 100644 index 0000000000..269823619e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_pointer_helper< T * > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html new file mode 100644 index 0000000000..0d3db72263 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_pointer_helper< T * > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_pointer_helper< T * > Struct Template Reference
    +
    +
    + +

    Helper for std::is_pointer (true specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_pointer_helper< T * >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.png b/docs/generated-html/structnv__std_1_1is__pointer__helper_3_01T_01_5_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..200380c63ee1bc7cff3a4aae82b889514a874259 GIT binary patch literal 834 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~UG#Ku45?szJNIJIV=Vy}{()`-m#Y;U}%eR`A5zoSZ?_e>2{Pj;&nbjk*4wQq}(QTPor0S zX1>c7OR2nl{60!@av(8ne@X-ny%k_x;3O zliw3hoeFcxPCXoQE8=ZeaN(-SFE1S9IR56MSN1nW-39TPsxN0t{ciER#JlU;CaY&v z6Ib6>@-&@!{^ssPl}r!u=6lo5${(7zM7rep!l_H*S9NN7#+iVEOKt`*^xm<4sDI6U zV80#n=hv^5E$o>;JO1RHBj04Ht=k~)>%*WwYX$>QIpdy;B&LdsM_4{A>g0UjDlB}U zTTQJ&98JNW4kwO-`#BhZ(E-&EJ)`iH;bwWI-fOj~?@vE{v$Xh1y}a6ij=X~=(&lUz z%52@{Iq&Y<8}q%mR-?@1?R&#gV>|cleHQuBr_#gkn!c{oX1jdlU`%|50rL{E16y8& zWN+Plsqp!=8aLC~Ki!yJU#D*Fi+rgk;UApspXxO0U5c*6tnvm>#W8Bd#k($Le)6QQ0ddmG&j@-30Gj|1dgSUd3@0O|N-`$qo zzAbC*y|acVPS5ywL2jy9bin>g%f7yU`ne`&)>JW#TVL}ZMBn#qn72>zlY5xSx4Sp% z7-ZkB+gNIr{Lk8_q44j`9Hu|1uyFbEN2Fb#sr)Z{NxS~099y=lzy#0W>FVdQ&MBb@ E06N` + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_same< A, B > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same.html b/docs/generated-html/structnv__std_1_1is__same.html new file mode 100644 index 0000000000..ebad17d282 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_same< A, B > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_same< A, B > Struct Template Reference
    +
    +
    + +

    std::is_same (false specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_same< A, B >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same.png b/docs/generated-html/structnv__std_1_1is__same.png new file mode 100644 index 0000000000000000000000000000000000000000..1417d3494d0d828ffae98dd872630bb74072c543 GIT binary patch literal 794 zcmeAS@N?(olHy`uVBq!ia0vp^uYov#gBeJkzgZLrq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~t@Lzp45?szJNIqgVFeyne)CKB{!iTg zl>7DsGw;o-nAGy7HxycKEBtTdd-+P|43(AKykVYM{>`D2Ze5nV_T=A8_RlJwj{M^M zy@y<`cFy*hTI?CLQhSYh;l0CI^ZYk2Q+1f_bK%mbho5#utk?d0D9fN;FZkTqtj)7> ztk~+1ilwTy3P#uTzSR#T2*Sy$?P>#913P!e{mxAV!P>^s@scy zN9~XOci{bw^;_lF-)Ud{SLMsYt-QN_Y3y2|X`cPg>DLba+10OX=Da*S>B}O&zYDxB z$?p1)T6*ngQFyYlr)lf_y{q$7UQStOu~&Ef|061%~CzQ%sV-2>l_Sp70^=$2Z;@HTS;)0X82xh~9O({`vP zByeEA*3v0ff}9J0vC;svgz?tpy=zvlzxz~s-{E^pCo;uc`@e}PCdB5(pKr4sbFZ#^POy1IhO(wh-+(|FF7J(D$f>>M#&x#}8Ar3!~~)Y>%fm`%&i$S6rhtPlOX zbkDU&?aO>$EX<~IX+-|5-Ro36*EHwuE2~X%R|=YJo|V`CS|loJ`ltNX?TF+_HQN@8 za+*JSdAhgp>)Fa)_rmX{kFLMVmYi4nT=-h~{p*kH-kzP|BIjOvBO>%yLE700`ES3u z3O482+*`e-c*lIUKXXJKuFReJ6Bj=%>QN>IJg8nr~xKq22WQ% Jmvv4FO#l(sipKx| literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html new file mode 100644 index 0000000000..0ac28c8d3d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_same< A, A > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html new file mode 100644 index 0000000000..7a5fc109e3 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_same< A, A > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_same< A, A > Struct Template Reference
    +
    +
    + +

    std::is_same (true specialization) +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_same< A, A >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.png b/docs/generated-html/structnv__std_1_1is__same_3_01A_00_01A_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..0d9a64a1af3b1400801967863ad1570b7fe50009 GIT binary patch literal 786 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~E%bD845?szJNM$I)e1bW;%dK z%vE7gsCV^w7pphLCM_qF1mYyy5s3I6;LylE4HxP!8OdZ%^EZd6}ek;XqMb7>`C@XTvkMJ}7=QYR?Q zE)#6sv$aofE$a=GIeynSddh~qn=_fOfBUp^bL5*<-_^=M9Nh;}Fl(W`&u>2T|9`z<#nX4P z@9%$Hc;eZMIn`(DE|+gESIGV^Upy7Z(!x!kRL|h)>gTe~ HDWM4f67_Sj literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html b/docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html new file mode 100644 index 0000000000..4fabae90ac --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__trivially__copyable-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_trivially_copyable< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__trivially__copyable.html b/docs/generated-html/structnv__std_1_1is__trivially__copyable.html new file mode 100644 index 0000000000..d8af9cc39e --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__trivially__copyable.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: nv_std::is_trivially_copyable< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_trivially_copyable< T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_trivially_copyable< T >:
    +
    +
    + + +nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)> + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    typedef bool value_type
     
    typedef integral_constant< bool, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< bool,(is_fundamental< T >::value||is_pointer< T >::value)>
    static const bool value
     
    +

    Detailed Description

    +

    template<typename T>
    +struct nv_std::is_trivially_copyable< T >

    + +

    std::is_trivially_copyable

    +

    This implementation only evaluates true if T is fundamental or pointer

    +

    Without help from partial template specializations provided by the user for a specific class or struct, this trait will never report that the specified class or struct is trivially-copyable ; this is always safe, if possibly sub-optimal.

    +

    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__trivially__copyable.png b/docs/generated-html/structnv__std_1_1is__trivially__copyable.png new file mode 100644 index 0000000000000000000000000000000000000000..464a678d11e4779e550d2ebd96efc0d83727d608 GIT binary patch literal 1280 zcmb_cZB&v66n>p9UDF*;vPx83ODij9DVQ)*^JqefIx~@6(8tj%AwLMLgp{^i=FAOK z%TPP{(cwf=($O?c)>4j6F(+!7d~Bj`gsAk&3{cqHer|tv&$;(L_nzmT`{O?6mL$f< zxU5C41pvT>fR9Q706P%IiB9%#L}U@Y;j}R^E;-s}v%zrD_@?<8^CuYn;}VG^_2Ib` zxO6(36n_waNgr%%AUO~K9Lfn%`;yPu>1%a)?{Lo1)%h7_YdL#ciCZjd-R&8+z_r5V z`3CZMtEMMz6y4N(swT3Oz)RvGQv1ogKX+hbli0o|30Fm$odko?2|RwWCVVKIkcXR_ z^3ouS_ahU`rUv)BEpv3ipf>?0&-8#c6TZISb4`R(tEhjh99>Zkn~s%JW>+fcp=g8c z>hyrA3aL7Bh{&2MA0^Vmh8wEu3Z#K*>)t)?LBHKd$JdS=&WuCmzZS^etsBf3poxsR zkg%n7a3MwC73K^fg}5xax{L9|U1~sMyje#FI}AcmSdg~1y7@lqB}DUjSiEM4o+C^f zZ!Np~x|=K&1~R;{L8+>j6muV{42fIVzgx@|m9EISmH6u66SD7S-En5S*NZjA?k0=%~$0R1pC|N{w_D>>Tnrvs_~c~aWS?oLUU-*?Ov?t(kdFG*%Sq+S#OTxYWAijR>Os}Iv<>6CXSdPXbc^4NoW6Vb z(*kAzqm31iex8!PV|4!8bE24ZG1l1_C6s0B<|gRvW&1pR``=(3ryf;`eO42@OLui zd+UU0j$|I{^e6ZcI_r31pI%a4Rpa{uG9H5ZK~TEB^EJ(W7~&{`4AeIV6d@uqeNPrG zn#~7Z03(%ryJxy$>^^%3Qr?>P4!-F{@E~A-+prMrpTo^7B8{^vwcq`Vr@WvDSeX9@ z!HkzRUFU6Yy8hx0&E_Vm1zMq!`qoLDqYA+*G|r2>jMQs*uHH|L zk}7h7ZMb`Sr|_p~w40MC|LI_f@$4(Ma?|g7EQhBWPe6>p^C7))jDs$TknK*RXy{eP zgj}iUROC^b1B`7E7JhC^?^LkT!!vG~HCF1XK8<9cwrFZ*V)&f>b-8B}@Q&e-yPEWf zch-Pj7RRU#>W$OaATarDkS8dm6SaC99-vdSv|-+n!GP3^kR3B-A7tC$doCKr9QsR&QPp* ozw_URnJ8oUH8mda^KG@-7lX%?$`bns_+0^n==i9X$fKA32A4dKM*si- literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__void-members.html b/docs/generated-html/structnv__std_1_1is__void-members.html new file mode 100644 index 0000000000..0ed9c5d5a6 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__void-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_void< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__void.html b/docs/generated-html/structnv__std_1_1is__void.html new file mode 100644 index 0000000000..908f72f017 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__void.html @@ -0,0 +1,124 @@ + + + + + + + +Cutlass: nv_std::is_void< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_void< T > Struct Template Reference
    +
    +
    + +

    std::is_void +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_void< T >:
    +
    +
    + + +nv_std::is_same< void, remove_cv< T >::type > +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__void.png b/docs/generated-html/structnv__std_1_1is__void.png new file mode 100644 index 0000000000000000000000000000000000000000..91bc2f5c2c5ecbb79c4635b71043eb7f5d063e58 GIT binary patch literal 1391 zcmeAS@N?(olHy`uVBq!ia0y~yV3Y*1J2;quoijLZ4{*`~Gq7N42f{iiq4>DDLFqO_HJ?JVshwCC)fzDOW# z?dGtXQ9Xif6N z;gYm{*@^1**(Pz*t+vbFJtvueOvGJ&3`MEMQzRJ6VvAZCNWGzOce ztOtOUOe!07sgIk_e7RSBvHCsdE)~&9A{xe%o*?5L$9=f%5BI9n&Xoz9G|w&CZqk$; z-)rLyl|9ctc1>K*R(He4Iy3GzZ?666bFchc=5Su#_$PANivH0|w_Zd%qceU|x|8;UVTn%9GZcYG?j zo0)cQ|HYmqf4S1$otwE*e%-SaHMT9%hd$aF_gkI+c+sx7*7WGHn@eW0Z+4nAv;Esl zoA&7(x6^jCeoDU_a60ej>{TUZM@#nD_%$9o_AzG@`$X$C+g3)nS6#oU;`yJa{(#q| z?is}nlbG(EsSMtvCcj?Q)4amwhG`^ta~_wZCC^oPe)^+-*5i^da#{ef2Rc1$ zxfXRXDw|Wvfl)xin4!;z`2m!w*gf-W>GbV#`|YmF`3E#7b;ZDk`zioc|kDbwc zm42i5bj^{AZgQu&X%Q~loCIILof3yGq literal 0 HcmV?d00001 diff --git a/docs/generated-html/structnv__std_1_1is__volatile-members.html b/docs/generated-html/structnv__std_1_1is__volatile-members.html new file mode 100644 index 0000000000..fc433c5aff --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile-members.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_volatile< T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile.html b/docs/generated-html/structnv__std_1_1is__volatile.html new file mode 100644 index 0000000000..a34d6b7044 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile.html @@ -0,0 +1,123 @@ + + + + + + + +Cutlass: nv_std::is_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_volatile< T > Struct Template Reference
    +
    +
    + +

    std::is_volatile +

    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_volatile< T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile.png b/docs/generated-html/structnv__std_1_1is__volatile.png new file mode 100644 index 0000000000000000000000000000000000000000..e10756055cbe1f8f78c3c2c7690f91411c8b0a4d GIT binary patch literal 786 zcmeAS@N?(olHy`uVBq!ia0vp^uYov#gBeJkzgZLrq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~E%bD845?szJNMC=|!Kfj*&)yCNG4EM43nI~q>yne=e zn(VbE!s#N_3hmC)YKVr_+|)dDAQztu}3D&BHVsPuI_f%?$o-G@JIh zadOJ-UB?2X_a9qxaD)06?rZxwE?HR>%-ghi_jQrYR@-x$inoSs@^gG~U3RmbJ!`g{ zRf4HyvaXKfZ{4uuM8h2mD^5ze9=*1H(xtHV5sy#ocy!$;h^r}f|FpK4F1ceHR_D6^ zluDm&|H1!B_LonmN-fr?yUNZxy`a0_SJd=j+VtJpw(Ab7`Oj@NE{&LU@5)N?RVsHE za@UJ(?mH=auUExYE3P{Htj8tISi`E>@%BelJj-96lbE*T{+e@Io?lmj0h>ST3}{RKKjdcJp4=w;OfyYQHRFd$4s%V1xOW%CLr7%WKWK<#RqMy-vPee1Fl` zf9~=1XZNJ@1i9yS%%n_U3xSY=aoJ zhi#?BzK#YK!r__slXbW02S#q1cHHct{qdZpJI9_gt*F>FtAgQv=$T!;hgLkBvL=ux zzoD)%{;yJTGB?9pMw#c5P=U n!~CG{$bNoqZL@UBtPA!(=Ns5aGYHHBCSV3nS3j3^P6 + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::is_volatile< volatile T > Member List
    +
    + + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 0000000000..1cec586abe --- /dev/null +++ b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: nv_std::is_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::is_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    #include <nv_std.h>

    +
    +Inheritance diagram for nv_std::is_volatile< volatile T >:
    +
    +
    + + +nv_std::integral_constant< value_t, V > + +
    + + + + + + + + + + + + + + + +

    +Additional Inherited Members

    - Public Types inherited from nv_std::integral_constant< value_t, V >
    typedef value_t value_type
     
    typedef integral_constant< value_t, V > type
     
    - Public Member Functions inherited from nv_std::integral_constant< value_t, V >
    CUTLASS_HOST_DEVICE operator value_type () const
     
    CUTLASS_HOST_DEVICE const value_type operator() () const
     
    - Static Public Attributes inherited from nv_std::integral_constant< value_t, V >
    static const value_t value = V
     
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.png b/docs/generated-html/structnv__std_1_1is__volatile_3_01volatile_01T_01_4.png new file mode 100644 index 0000000000000000000000000000000000000000..ee469288617792a569026083c3a6defa1b9b5204 GIT binary patch literal 801 zcmeAS@N?(olHy`uVBq!ia0vp^&wx09gBeI#ssDTkq$C1-LR|m<{|{t-`1)emq0$LJ z78oBma3Er1MHrCFQ4-`A%m7pb0#{Fk7%?y~ZSZt)45?szJNNCR)d~Wvvy-px`>%Z8 z#Wlg~)i-JX;7WG8q{66f{t{Ip2!Oh8R)bB>CqTYLBJ z*=JlXd)nRnI<4Qp`u<0C!{$S`m9(rXdk=pKbSb{McikE8*KI$GXKLM9Q+-YMqSE&X zJ9RXQF3X;OKRvJAoKNT3p1TQbnzoK!{{Q^V#Kj+Z?fRHw=kZK)dANCRa_BteS$6}i zG#A}Gmg6(M;Az`i(O#jvxu&{LSMN68(!8_!pT(9L*+05#kI#5--#l{}pTTCOleR~6 z{)!YAraAf>_?{_Sq^Wtu>Tu5U!@L*QC`iw^<{sGB?isyh!{S?8`l5H%tTpR?^=_qa zvHZ%lI=Wk~n)xh`-5onoG<3d?=kBT-YpSi+?rbkE0ca&{=U+yQu=by<=cyXs>%Rm%hlGZ{0#($QlqA4+?PXhmSq;{H{8F*-%xMM z{Q3EF!8zv-)CAf!epZ~Z_%hRviw9UfENbL@;L0p~pqovt0T{px`cg9(;!F$}_hck6 zRiG(25H8}x@z0gv!#bEIyJInD&0Z&)XMQz}l5235y?um_t6<%21IBNmGCME7x$^AL z$$rE5)$gCKc&qgM^c;3qlh3E`zM0gXvE6N1-%;1uzYT(SnFM7pss`9Rop?FiP-{+} z`~HOFL$z)-VrOIDm)#QcFMaZ=?Ul%jZ7Ua7GTmy;EIT@7(~PIP#b(#Mp46x*_$+#oHHJY=Qx(Cm$_N_O5v+gXn+5Hv1(Pf5?+xd1Z zji|kudVS93lI@q{w(MMcnQ6xb)-yZ*oxi*|Tl_$NmJ3o~RQ> + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::less< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::less< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constnv_std::less< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1less.html b/docs/generated-html/structnv__std_1_1less.html new file mode 100644 index 0000000000..83b22cddd4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1less.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: nv_std::less< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::less< T > Struct Template Reference
    +
    +
    + +

    std::less +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexpr bool operator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr bool nv_std::less< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1nullptr__t.html b/docs/generated-html/structnv__std_1_1nullptr__t.html new file mode 100644 index 0000000000..09279c72a2 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1nullptr__t.html @@ -0,0 +1,95 @@ + + + + + + + +Cutlass: nv_std::nullptr_t Struct Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::nullptr_t Struct Reference
    +
    +
    + +

    std::nullptr_t +

    + +

    #include <nv_std.h>

    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1plus-members.html b/docs/generated-html/structnv__std_1_1plus-members.html new file mode 100644 index 0000000000..4470e45ddc --- /dev/null +++ b/docs/generated-html/structnv__std_1_1plus-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::plus< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::plus< T >, including all inherited members.

    + + +
    operator()(const T &lhs, const T &rhs) constnv_std::plus< T >inline
    + + + + diff --git a/docs/generated-html/structnv__std_1_1plus.html b/docs/generated-html/structnv__std_1_1plus.html new file mode 100644 index 0000000000..929e75d620 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1plus.html @@ -0,0 +1,143 @@ + + + + + + + +Cutlass: nv_std::plus< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::plus< T > Struct Template Reference
    +
    +
    + +

    nv_std::plus +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Member Functions

    CUTLASS_HOST_DEVICE constexproperator() (const T &lhs, const T &rhs) const
     
    +

    Member Function Documentation

    + +

    ◆ operator()()

    + +
    +
    +
    +template<typename T >
    + + + + + +
    + + + + + + + + + + + + + + + + + + +
    CUTLASS_HOST_DEVICE constexpr T nv_std::plus< T >::operator() (const T & lhs,
    const T & rhs 
    ) const
    +
    +inline
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const-members.html b/docs/generated-html/structnv__std_1_1remove__const-members.html new file mode 100644 index 0000000000..b9647b8b30 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_const< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_const< T >, including all inherited members.

    + + +
    type typedefnv_std::remove_const< T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const.html b/docs/generated-html/structnv__std_1_1remove__const.html new file mode 100644 index 0000000000..b371ed23f4 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_const< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_const< T > Struct Template Reference
    +
    +
    + +

    std::remove_const (non-const specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T nv_std::remove_const< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html new file mode 100644 index 0000000000..4bee0ceb09 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_const< const T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_const< const T >, including all inherited members.

    + + +
    type typedefnv_std::remove_const< const T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html new file mode 100644 index 0000000000..b33f95d46c --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__const_3_01const_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_const< const T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_const< const T > Struct Template Reference
    +
    +
    + +

    std::remove_const (const specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T nv_std::remove_const< const T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__cv-members.html b/docs/generated-html/structnv__std_1_1remove__cv-members.html new file mode 100644 index 0000000000..740ac2981d --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__cv-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_cv< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_cv< T >, including all inherited members.

    + + +
    type typedefnv_std::remove_cv< T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__cv.html b/docs/generated-html/structnv__std_1_1remove__cv.html new file mode 100644 index 0000000000..58a19ab905 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__cv.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_cv< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_cv< T > Struct Template Reference
    +
    +
    + +

    std::remove_cv +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef remove_volatile< typename remove_const< T >::type >::type type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef remove_volatile<typename remove_const<T>::type>::type nv_std::remove_cv< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile-members.html b/docs/generated-html/structnv__std_1_1remove__volatile-members.html new file mode 100644 index 0000000000..9bb367d554 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_volatile< T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_volatile< T >, including all inherited members.

    + + +
    type typedefnv_std::remove_volatile< T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile.html b/docs/generated-html/structnv__std_1_1remove__volatile.html new file mode 100644 index 0000000000..a77e3a17d5 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_volatile< T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_volatile< T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (non-volatile specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T>
    + + + + +
    typedef T nv_std::remove_volatile< T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html new file mode 100644 index 0000000000..1d84101af0 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4-members.html @@ -0,0 +1,91 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    nv_std::remove_volatile< volatile T > Member List
    +
    +
    + +

    This is the complete list of members for nv_std::remove_volatile< volatile T >, including all inherited members.

    + + +
    type typedefnv_std::remove_volatile< volatile T >
    + + + + diff --git a/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html new file mode 100644 index 0000000000..1b8cc6b095 --- /dev/null +++ b/docs/generated-html/structnv__std_1_1remove__volatile_3_01volatile_01T_01_4.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: nv_std::remove_volatile< volatile T > Struct Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    nv_std::remove_volatile< volatile T > Struct Template Reference
    +
    +
    + +

    std::remove_volatile (volatile specialization) +

    + +

    #include <nv_std.h>

    + + + + +

    +Public Types

    typedef T type
     
    +

    Member Typedef Documentation

    + +

    ◆ type

    + +
    +
    +
    +template<typename T >
    + + + + +
    typedef T nv_std::remove_volatile< volatile T >::type
    +
    + +
    +
    +
    The documentation for this struct was generated from the following file: +
    + + + + diff --git a/docs/generated-html/sync_off.png b/docs/generated-html/sync_off.png new file mode 100644 index 0000000000000000000000000000000000000000..205d0717c643fd351126924977dc77eee5c0f94a GIT binary patch literal 855 zcmV-d1E~CoP)4?!{bHLqDlVHJCIRNMNA$iy=jJO%d?`Lk6re?1nqk@iX5*abvIfxT zpU*+(#&up)m;;;?g#cF06gNOz$#w@oJZa*zuxWtBq7#TIHu4elkUom{G%who|W%xXO(Yf zh;cRgc2HBKgO84SL+t@-O>Me(G(e~~e|fvi`CggNc|f$}W%{TtY@yP-ft>W>NPCsH znUY-HJxpufW@mZXHkxY={%d?}4PXnUqcaO&M!i4j5+`OP09HPoXUc<0w7v`{=1Kxzx3pXZzpWBn|>vg z-H|!>)Z<^;F6&>HG7SFqHTV9y1~82$qm-@7kB92I+J5}~8wqg3{FfmeLBBb`qmkS4 z)+01kc9E5~Xr^R}d@mj4z=O;DGd6A4?$;5NOXZeunV&e4U96KJBTpYP^zbUcGzD1| zEGurK@BSBzKR3QB5G{=wKr6b*MK-8NHp{9=b+`i)lVfrBs1*4I!C-)?;7b6jAM~&? z0u+fmA`@T^X!>gc4;q`@Bk=*i966<(TTiqrLYLR;#SZXxJ{4yZooJtkZt(=*4QXWSwe`@jz65~Pw_=~roC2aH(Ft}}yeD*f+$CByufhk32K--) hioS5Cq4@@|dIhfwqgK<&XjlLM002ovPDHLkV1ii&nnVBq literal 0 HcmV?d00001 diff --git a/docs/generated-html/sync_on.png b/docs/generated-html/sync_on.png new file mode 100644 index 0000000000000000000000000000000000000000..e82391b338910855b2314df56f8366751326b518 GIT binary patch literal 846 zcmV-U1F`&xP)X5B>jX%wylXhpA^4{6XH+6 z7V#1^i64a@U=Gs6ZqX{f5k1z2ivVlv62H%*bL+KC{3H$~O3*0IzUH$5%*83u=}e&6 zPcJ~{#`S|JvnDtyiU8tCPRanV@wmMJiKL6;!len8iw*!M7q0N8vM*Ux7zD7%&Ghq8 zc^66UWPo_ubQGFki&zTSBE_3YW7Xb8fr1YRy_^BCnF&p?@|`-GYY&i>s^tOjKRY!+ zJz$HxtP-}?9%Ok=Ilyb>WE4e{_;M-$oVI9!I%`XyAV^d7elk<@X4{JMYFPJP8yf)qvMZAmMeE4o>nl=j4!Y)#;bPN@7LEjb(Dtt5LhB$;P=cA>~-HzdEMMnmq@Znj(hSe#o! z@cnjK|GJ!^|G}@^9lHdWk(=eGW$m}odtGHWAO0o@t^u|f)XMF1CKw$bkhdPEesw3= z>F>rZDr25>bl30q@b}ZWJ9eLrKtIndkqX$t>q+H9NizP=2!kVk0A>_smr=E%ncjQn zn0#)`DS*v-O`tcr$<1z1wrX~{Ln+=2&(!o&z>FlBgK#*+%kT?;c&_SUWh5vTx5W~G z1)!y`1>A3JcB5h+zydj@ojXsopG84zKL>bI-;0xpPPFetmzV&UCoQhS;%jT8`;!ja zR;1TJ?AX+@)pgI=y@UJ+}Rg-{Y!Txy18)!{>XU(*s|dWv_B|p6H=E7cBC*K_Rl!&mvb=dL$c?Kq)F4wjYCg-|9;S}OYq}??*>ndzm+Hbq;s{(Yn4(&Ad#BEhWeWzdv*+l@b~JG;3a`a8o8OC{&Tatx9yfZ!ck_KtN!z8_5Z!1N4|eAY}t_VRWi8!u36}T#e5g8 z3dU9~S^AtKd*@!&S66RM63L$Qye)g}0E&@uRA#KEKy; r$>L)Z_DY$=H_d&e9Lhhlli>h1,.sm>li>h2,.sm>li>h3,.sm>li>h4,.sm>li>h5,.sm>li>h6{margin:0;padding:0}.sm ul{display:none}.sm li,.sm a{position:relative}.sm a{display:block}.sm a.disabled{cursor:not-allowed}.sm:after{content:"\00a0";display:block;height:0;font:0/0 serif;clear:both;visibility:hidden;overflow:hidden}.sm,.sm *,.sm *:before,.sm *:after{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}#doc-content{overflow:auto;display:block;padding:0;margin:0;-webkit-overflow-scrolling:touch}.sm-dox{background-image:url("tab_b.png")}.sm-dox a,.sm-dox a:focus,.sm-dox a:hover,.sm-dox a:active{padding:0 12px;padding-right:43px;font-family:"Lucida Grande","Geneva","Helvetica",Arial,sans-serif;font-size:13px;font-weight:bold;line-height:36px;text-decoration:none;text-shadow:0 1px 1px rgba(255,255,255,0.9);color:#283a5d;outline:0}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a.current{color:#d23600}.sm-dox a.disabled{color:#bbb}.sm-dox a span.sub-arrow{position:absolute;top:50%;margin-top:-14px;left:auto;right:3px;width:28px;height:28px;overflow:hidden;font:bold 12px/28px monospace!important;text-align:center;text-shadow:none;background:rgba(255,255,255,0.5);-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox a.highlighted span.sub-arrow:before{display:block;content:'-'}.sm-dox>li:first-child>a,.sm-dox>li:first-child>:not(ul) a{-moz-border-radius:5px 5px 0 0;-webkit-border-radius:5px;border-radius:5px 5px 0 0}.sm-dox>li:last-child>a,.sm-dox>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul{-moz-border-radius:0 0 5px 5px;-webkit-border-radius:0;border-radius:0 0 5px 5px}.sm-dox>li:last-child>a.highlighted,.sm-dox>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>a.highlighted,.sm-dox>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>ul>li:last-child>*:not(ul) a.highlighted{-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox ul{background:rgba(162,162,162,0.1)}.sm-dox ul a,.sm-dox ul a:focus,.sm-dox ul a:hover,.sm-dox ul a:active{font-size:12px;border-left:8px solid transparent;line-height:36px;text-shadow:none;background-color:white;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul ul a,.sm-dox ul ul a:hover,.sm-dox ul ul a:focus,.sm-dox ul ul a:active{border-left:16px solid transparent}.sm-dox ul ul ul a,.sm-dox ul ul ul a:hover,.sm-dox ul ul ul a:focus,.sm-dox ul ul ul a:active{border-left:24px solid transparent}.sm-dox ul ul ul ul a,.sm-dox ul ul ul ul a:hover,.sm-dox ul ul ul ul a:focus,.sm-dox ul ul ul ul a:active{border-left:32px solid transparent}.sm-dox ul ul ul ul ul a,.sm-dox ul ul ul ul ul a:hover,.sm-dox ul ul ul ul ul a:focus,.sm-dox ul ul ul ul ul a:active{border-left:40px solid transparent}@media(min-width:768px){.sm-dox ul{position:absolute;width:12em}.sm-dox li{float:left}.sm-dox.sm-rtl li{float:right}.sm-dox ul li,.sm-dox.sm-rtl ul li,.sm-dox.sm-vertical li{float:none}.sm-dox a{white-space:nowrap}.sm-dox ul a,.sm-dox.sm-vertical a{white-space:normal}.sm-dox .sm-nowrap>li>a,.sm-dox .sm-nowrap>li>:not(ul) a{white-space:nowrap}.sm-dox{padding:0 10px;background-image:url("tab_b.png");line-height:36px}.sm-dox a span.sub-arrow{top:50%;margin-top:-2px;right:12px;width:0;height:0;border-width:4px;border-style:solid dashed dashed dashed;border-color:#283a5d transparent transparent transparent;background:transparent;-moz-border-radius:0;-webkit-border-radius:0;border-radius:0}.sm-dox a,.sm-dox a:focus,.sm-dox a:active,.sm-dox a:hover,.sm-dox a.highlighted{padding:0 12px;background-image:url("tab_s.png");background-repeat:no-repeat;background-position:right;-moz-border-radius:0!important;-webkit-border-radius:0;border-radius:0!important}.sm-dox a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox a:hover span.sub-arrow{border-color:white transparent transparent transparent}.sm-dox a.has-submenu{padding-right:24px}.sm-dox li{border-top:0}.sm-dox>li>ul:before,.sm-dox>li>ul:after{content:'';position:absolute;top:-18px;left:30px;width:0;height:0;overflow:hidden;border-width:9px;border-style:dashed dashed solid dashed;border-color:transparent transparent #bbb transparent}.sm-dox>li>ul:after{top:-16px;left:31px;border-width:8px;border-color:transparent transparent #fff transparent}.sm-dox ul{border:1px solid #bbb;padding:5px 0;background:#fff;-moz-border-radius:5px!important;-webkit-border-radius:5px;border-radius:5px!important;-moz-box-shadow:0 5px 9px rgba(0,0,0,0.2);-webkit-box-shadow:0 5px 9px rgba(0,0,0,0.2);box-shadow:0 5px 9px rgba(0,0,0,0.2)}.sm-dox ul a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-color:transparent transparent transparent #555;border-style:dashed dashed dashed solid}.sm-dox ul a,.sm-dox ul a:hover,.sm-dox ul a:focus,.sm-dox ul a:active,.sm-dox ul a.highlighted{color:#555;background-image:none;border:0!important;color:#555;background-image:none}.sm-dox ul a:hover{background-image:url("tab_a.png");background-repeat:repeat-x;color:white;text-shadow:0 1px 1px black}.sm-dox ul a:hover span.sub-arrow{border-color:transparent transparent transparent white}.sm-dox span.scroll-up,.sm-dox span.scroll-down{position:absolute;display:none;visibility:hidden;overflow:hidden;background:#fff;height:36px}.sm-dox span.scroll-up:hover,.sm-dox span.scroll-down:hover{background:#eee}.sm-dox span.scroll-up:hover span.scroll-up-arrow,.sm-dox span.scroll-up:hover span.scroll-down-arrow{border-color:transparent transparent #d23600 transparent}.sm-dox span.scroll-down:hover span.scroll-down-arrow{border-color:#d23600 transparent transparent transparent}.sm-dox span.scroll-up-arrow,.sm-dox span.scroll-down-arrow{position:absolute;top:0;left:50%;margin-left:-6px;width:0;height:0;overflow:hidden;border-width:6px;border-style:dashed dashed solid dashed;border-color:transparent transparent #555 transparent}.sm-dox span.scroll-down-arrow{top:8px;border-style:solid dashed dashed dashed;border-color:#555 transparent transparent transparent}.sm-dox.sm-rtl a.has-submenu{padding-right:12px;padding-left:24px}.sm-dox.sm-rtl a span.sub-arrow{right:auto;left:12px}.sm-dox.sm-rtl.sm-vertical a.has-submenu{padding:10px 20px}.sm-dox.sm-rtl.sm-vertical a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-rtl>li>ul:before{left:auto;right:30px}.sm-dox.sm-rtl>li>ul:after{left:auto;right:31px}.sm-dox.sm-rtl ul a.has-submenu{padding:10px 20px!important}.sm-dox.sm-rtl ul a span.sub-arrow{right:auto;left:8px;border-style:dashed solid dashed dashed;border-color:transparent #555 transparent transparent}.sm-dox.sm-vertical{padding:10px 0;-moz-border-radius:5px;-webkit-border-radius:5px;border-radius:5px}.sm-dox.sm-vertical a{padding:10px 20px}.sm-dox.sm-vertical a:hover,.sm-dox.sm-vertical a:focus,.sm-dox.sm-vertical a:active,.sm-dox.sm-vertical a.highlighted{background:#fff}.sm-dox.sm-vertical a.disabled{background-image:url("tab_b.png")}.sm-dox.sm-vertical a span.sub-arrow{right:8px;top:50%;margin-top:-5px;border-width:5px;border-style:dashed dashed dashed solid;border-color:transparent transparent transparent #555}.sm-dox.sm-vertical>li>ul:before,.sm-dox.sm-vertical>li>ul:after{display:none}.sm-dox.sm-vertical ul a{padding:10px 20px}.sm-dox.sm-vertical ul a:hover,.sm-dox.sm-vertical ul a:focus,.sm-dox.sm-vertical ul a:active,.sm-dox.sm-vertical ul a.highlighted{background:#eee}.sm-dox.sm-vertical ul a.disabled{background:#fff}} \ No newline at end of file diff --git a/docs/generated-html/tensor__ref_8h.html b/docs/generated-html/tensor__ref_8h.html new file mode 100644 index 0000000000..14314c66ff --- /dev/null +++ b/docs/generated-html/tensor__ref_8h.html @@ -0,0 +1,111 @@ + + + + + + + +Cutlass: tensor_ref.h File Reference + + + + + + + + + + + +
    + +
    +
    tensor_ref.h File Reference
    +
    +
    + +

    Defines a structure containing strides, bounds, and a pointer to tensor data. +More...

    +
    #include <typeinfo>
    +#include <cutlass/coord.h>
    +#include <cutlass/cutlass.h>
    +#include <cutlass/vector.h>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    class  cutlass::TensorRef< Storage_, Rank_ >
     Structure modeling a pointer and stride into a tensor. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tensor__ref_8h_source.html b/docs/generated-html/tensor__ref_8h_source.html new file mode 100644 index 0000000000..8031da7d32 --- /dev/null +++ b/docs/generated-html/tensor__ref_8h_source.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: tensor_ref.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tensor_ref.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <typeinfo>
    31 
    32 #include <cutlass/coord.h>
    33 #include <cutlass/cutlass.h>
    34 #include <cutlass/vector.h>
    35 
    36 namespace cutlass {
    37 
    39 
    41 template <typename Storage_, int Rank_>
    42 class TensorRef {
    43  public:
    45  typedef Storage_ Storage;
    46 
    48  static int const Rank = Rank_;
    49 
    50  private:
    51  //
    52  // Data members
    53  //
    54 
    56  Storage* ptr_;
    57 
    59  Coord<Rank> stride_;
    60 
    61  public:
    62  //
    63  // Methods
    64  //
    65 
    68  TensorRef() : ptr_(nullptr) {}
    69 
    72  TensorRef(Storage* ptr, Coord<Rank> stride) : ptr_(ptr), stride_(stride) {}
    73 
    76  void reset(Storage* ptr = nullptr, Coord<Rank> stride = Coord<Rank>(0)) {
    77  ptr_ = ptr;
    78  stride_ = stride;
    79  }
    80 
    82  template <typename T>
    84  Coord<Rank> converted_stride;
    85  for (int i = 0; i < Rank - 1; ++i) {
    86  converted_stride[i] = stride_[i] * Extent<Storage>::kValue / Extent<T>::kValue;
    87  }
    88  converted_stride[Rank - 1] = stride_[Rank - 1];
    89 
    90  return TensorRef<T, Rank>(reinterpret_cast<T*>(ptr_), converted_stride);
    91  }
    92 
    95  bool good() const { return ptr_ != nullptr; }
    96 
    99  Storage* data() const { return ptr_; }
    100 
    103  Coord<Rank> const& stride() const { return stride_; }
    104 
    107  int const& stride(int dim) const { return stride_.at(dim); }
    108 
    111  int leading_dim() const { return __NV_STD_MAX(stride_[1], stride_[2]); }
    112 
    115  long long offset(Coord<Rank> const& coord) const {
    116  return stride_.template dot<long long>(coord);
    117  }
    118 
    121  Storage& at(Coord<Rank> const& coord) const { return ptr_[offset(coord)]; }
    122 
    124  Storage& operator[](Coord<Rank> const& coord) const { return at(coord); }
    125 
    128  Storage& at(int idx) const { return ptr_[idx]; }
    129 
    131  Storage& operator[](int idx) const { return at(idx); }
    132 
    136  ptr_ += offset(b);
    137  return *this;
    138  }
    139 
    142  TensorRef operator+(Coord<Rank> const& b) const { return TensorRef(ptr_ + offset(b), stride_); }
    143 
    146  TensorRef operator-(Coord<Rank> const& b) const { return TensorRef(ptr_ - offset(b), stride_); }
    147 };
    148 
    150 
    151 } // namespace cutlass
    CUTLASS_HOST_DEVICE int const & stride(int dim) const
    Returns the stride of the tensor in the given dimension.
    Definition: tensor_ref.h:107
    +
    Storage & operator[](int idx) const
    Element-wise accessor.
    Definition: tensor_ref.h:131
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE Storage & at(Coord< Rank > const &coord) const
    Returns a reference to the element at a given Coord.
    Definition: tensor_ref.h:121
    +
    CUTLASS_HOST_DEVICE TensorRef & advance(Coord< Rank > const &b)
    Adds an offset to the pointer.
    Definition: tensor_ref.h:135
    +
    static int const Rank
    Rank of tensor.
    Definition: tensor_ref.h:48
    +
    CUTLASS_HOST_DEVICE TensorRef operator+(Coord< Rank > const &b) const
    Returns a TensorRef offset by a given amount.
    Definition: tensor_ref.h:142
    +
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    +
    Storage_ Storage
    Data type of individual access.
    Definition: tensor_ref.h:45
    +
    CUTLASS_HOST_DEVICE TensorRef operator-(Coord< Rank > const &b) const
    Returns a TensorRef offset by a given amount.
    Definition: tensor_ref.h:146
    +
    #define __NV_STD_MAX(a, b)
    Select maximum(a, b)
    Definition: platform.h:155
    +
    CUTLASS_HOST_DEVICE int leading_dim() const
    Returns the maximum stride element as the &#39;leading dimension&#39;.
    Definition: tensor_ref.h:111
    +
    CUTLASS_HOST_DEVICE Storage * data() const
    Returns the pointer to referenced data.
    Definition: tensor_ref.h:99
    +
    CUTLASS_HOST_DEVICE TensorRef(Storage *ptr, Coord< Rank > stride)
    Constructs from a pointer, size, and stride.
    Definition: tensor_ref.h:72
    +
    Storage & operator[](Coord< Rank > const &coord) const
    Element-wise accessor.
    Definition: tensor_ref.h:124
    +
    #define nullptr
    nullptr
    Definition: platform.h:136
    +
    CUTLASS_HOST_DEVICE long long offset(Coord< Rank > const &coord) const
    Computes the offset of an index from the origin of the tensor.
    Definition: tensor_ref.h:115
    +
    Structure modeling a pointer and stride into a tensor.
    Definition: tensor_ref.h:42
    +
    TensorRef< T, Rank > convert()
    Conversion function.
    Definition: tensor_ref.h:83
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    CUTLASS_HOST_DEVICE bool good() const
    Returns true if the TensorRef may be safely accessed.
    Definition: tensor_ref.h:95
    + +
    Defines a 1D vector of elements held in the registers of each thread.
    +
    CUTLASS_HOST_DEVICE void reset(Storage *ptr=nullptr, Coord< Rank > stride=Coord< Rank >(0))
    Updates the pointer, stride, and location within a TensorRef.
    Definition: tensor_ref.h:76
    +
    CUTLASS_HOST_DEVICE int & at()
    Gets the index of a given Coord element.
    Definition: coord.h:185
    +
    CUTLASS_HOST_DEVICE Coord< Rank > const & stride() const
    Returns the stride of the tensor.
    Definition: tensor_ref.h:103
    +
    Basic include for CUTLASS macros.
    +
    CUTLASS_HOST_DEVICE Storage & at(int idx) const
    Returns a reference to the element at a given Coord.
    Definition: tensor_ref.h:128
    +
    CUTLASS_HOST_DEVICE TensorRef()
    Default ctor.
    Definition: tensor_ref.h:68
    +
    Returns the extent of a scalar or vector.
    Definition: vector.h:161
    +
    + + + + diff --git a/docs/generated-html/tensor__view_8h.html b/docs/generated-html/tensor__view_8h.html new file mode 100644 index 0000000000..7fa3cfcf82 --- /dev/null +++ b/docs/generated-html/tensor__view_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: tensor_view.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tensor_view.h File Reference
    +
    +
    + +

    Defines a structure containing strides and a pointer to tensor data. +More...

    +
    #include <cmath>
    +#include <cutlass/cutlass.h>
    +#include <cutlass/tensor_ref.h>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    class  cutlass::TensorView< T >
     Host-side reference implementation of tensor operations. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tensor__view_8h_source.html b/docs/generated-html/tensor__view_8h_source.html new file mode 100644 index 0000000000..655854021d --- /dev/null +++ b/docs/generated-html/tensor__view_8h_source.html @@ -0,0 +1,127 @@ + + + + + + + +Cutlass: tensor_view.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tensor_view.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cmath>
    32 
    33 #include <cutlass/cutlass.h>
    34 #include <cutlass/tensor_ref.h>
    35 
    36 namespace cutlass {
    37 
    39 
    41 template <typename T>
    42 class TensorView : public TensorRef<T, 4> {
    43  public:
    46 
    48  typedef Base TensorRef_t;
    49 
    52 
    54  static int const Rank = TensorRef_t::Rank;
    55 
    57  typedef int Offset_t;
    58 
    61 
    62  private:
    63  //
    64  // Data members
    65  //
    66 
    68  TensorRef_t ref_;
    69 
    71  Coord_t size_;
    72 
    73  public:
    74  //
    75  // Device and Host Methods
    76  //
    77 
    81 
    84  TensorView(TensorRef_t const& _ref, Coord_t const& _size) : Base(_ref), size_(_size) {}
    85 
    88  bool good() const { return ref().good(); }
    89 
    92  T* data() const { return ref().data(); }
    93 
    96  void reset(TensorRef_t const& _ref = TensorRef_t(0), Coord_t const& _size = Coord_t()) {
    97  Base::operator=(_ref);
    98  size_ = _size;
    99  }
    100 
    103  TensorRef_t& ref() { return *this; }
    104 
    108 
    111  TensorRef_t const& ref() const { return *this; }
    112 
    115  Coord_t const& size() const { return size_; }
    116 
    119  int size(int dim) const { return size_.at(dim); }
    120 
    123  Coord_t const& stride() const { return ref().stride(); }
    124 
    127  int const& stride(int dim) const { return ref().stride(dim); }
    128 
    131  TensorView& operator=(TensorView const& _tensor) {
    132  Base::operator=(_tensor._ref);
    133  size_ = _tensor.size_;
    134  return *this;
    135  }
    136 
    139  Offset_t offset(Coord_t const& coord) const { return ref().offset(coord); }
    140 
    143  bool contains(Coord_t const& coord) const {
    144  for (int dim = 0; dim < Rank; ++dim) {
    145  if (coord.at(dim) >= size_.at(dim)) {
    146  return false;
    147  }
    148  }
    149  return true;
    150  }
    151 
    154  T& at(Coord_t const& coord) const { return ref().at(coord); }
    155 
    157  T& operator[](Coord<Rank> const& coord) const { return at(coord); }
    158 
    161  T& at(Offset_t idx) const { return ref().at(idx); }
    162 
    165  TensorView<T> subview(Coord_t const& location, Coord_t size) const {
    166  return TensorView<T>(ref() + location, size.clamp(size_ - location));
    167  }
    168 };
    169 
    171 
    172 } // namespace cutlass
    CUTLASS_HOST_DEVICE TensorRef_t const & ref() const
    Accesses the tensor reference pointing to data.
    Definition: tensor_view.h:111
    +
    Definition: convert.h:33
    +
    Defines a structure containing strides, bounds, and a pointer to tensor data.
    +
    CUTLASS_HOST_DEVICE Storage & at(Coord< Rank > const &coord) const
    Returns a reference to the element at a given Coord.
    Definition: tensor_ref.h:121
    +
    int Offset_t
    Type used to compute the offset of an element to the base of a tensor.
    Definition: tensor_view.h:57
    +
    static int const Rank
    Rank of tensor.
    Definition: tensor_ref.h:48
    +
    CUTLASS_HOST_DEVICE TensorView()
    Default constructor.
    Definition: tensor_view.h:80
    +
    CUTLASS_HOST_DEVICE int size(int dim) const
    Accesses the size.
    Definition: tensor_view.h:119
    +
    CUTLASS_HOST_DEVICE Coord & clamp(Coord< N > const &max, Coord< N > const &min=Coord< N >())
    Clamps a coordinate to a range specified by maximum and minimum values.
    Definition: coord.h:219
    +
    Coord< Rank > Coord_t
    Coordinate into tensor.
    Definition: tensor_view.h:60
    +
    CUTLASS_HOST_DEVICE void reset(TensorRef_t const &_ref=TensorRef_t(0), Coord_t const &_size=Coord_t())
    Updates the reference and size of a Tensor_view object.
    Definition: tensor_view.h:96
    +
    CUTLASS_HOST_DEVICE bool contains(Coord_t const &coord) const
    Determines whether a location is within a tensor.
    Definition: tensor_view.h:143
    +
    CUTLASS_HOST_DEVICE int const & stride(int dim) const
    Accesses the stride.
    Definition: tensor_view.h:127
    +
    static int const Rank
    Rank of tensor.
    Definition: tensor_view.h:54
    +
    CUTLASS_HOST_DEVICE T & at(Offset_t idx) const
    Element-wise accessor.
    Definition: tensor_view.h:161
    +
    CUTLASS_HOST_DEVICE ConstTensorRef_t const_ref()
    Definition: tensor_view.h:107
    +
    CUTLASS_HOST_DEVICE Storage * data() const
    Returns the pointer to referenced data.
    Definition: tensor_ref.h:99
    +
    Host-side reference implementation of tensor operations.
    Definition: tensor_view.h:42
    +
    CUTLASS_HOST_DEVICE long long offset(Coord< Rank > const &coord) const
    Computes the offset of an index from the origin of the tensor.
    Definition: tensor_ref.h:115
    +
    Structure modeling a pointer and stride into a tensor.
    Definition: tensor_ref.h:42
    +
    TensorRef< T, 4 > Base
    Reference and stride.
    Definition: tensor_view.h:45
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    CUTLASS_HOST_DEVICE bool good() const
    Returns true if the Tensor_view is bound to some memory.
    Definition: tensor_view.h:88
    +
    CUTLASS_HOST_DEVICE bool good() const
    Returns true if the TensorRef may be safely accessed.
    Definition: tensor_ref.h:95
    +
    CUTLASS_HOST_DEVICE Offset_t offset(Coord_t const &coord) const
    Returns the index of an element.
    Definition: tensor_view.h:139
    +
    CUTLASS_HOST_DEVICE T * data() const
    Returns a pointer to data.
    Definition: tensor_view.h:92
    + +
    T & operator[](Coord< Rank > const &coord) const
    Element-wise accessor.
    Definition: tensor_view.h:157
    +
    Base TensorRef_t
    Reference and stride.
    Definition: tensor_view.h:48
    +
    CUTLASS_HOST_DEVICE int & at()
    Gets the index of a given Coord element.
    Definition: coord.h:185
    +
    CUTLASS_HOST_DEVICE T & at(Coord_t const &coord) const
    Element-wise accessor.
    Definition: tensor_view.h:154
    +
    CUTLASS_HOST_DEVICE Coord_t const & size() const
    Accesses the size.
    Definition: tensor_view.h:115
    +
    CUTLASS_HOST_DEVICE Coord_t const & stride() const
    Accesses the stride.
    Definition: tensor_view.h:123
    +
    CUTLASS_HOST_DEVICE TensorRef_t & ref()
    Accesses the tensor reference pointing to data.
    Definition: tensor_view.h:103
    +
    CUTLASS_HOST_DEVICE Coord< Rank > const & stride() const
    Returns the stride of the tensor.
    Definition: tensor_ref.h:103
    +
    CUTLASS_HOST_DEVICE TensorView & operator=(TensorView const &_tensor)
    Assigns the Tensor_view.
    Definition: tensor_view.h:131
    +
    Basic include for CUTLASS macros.
    +
    CUTLASS_HOST_DEVICE TensorView(TensorRef_t const &_ref, Coord_t const &_size)
    Constructs a Tensor_view from a TensorRef and size.
    Definition: tensor_view.h:84
    +
    TensorRef< T const, 4 > ConstTensorRef_t
    Reference to constant type.
    Definition: tensor_view.h:51
    +
    CUTLASS_HOST_DEVICE TensorView< T > subview(Coord_t const &location, Coord_t size) const
    Returns a Tensor_view given location and size quantities.
    Definition: tensor_view.h:165
    +
    + + + + diff --git a/docs/generated-html/thread__multiply__add_8h.html b/docs/generated-html/thread__multiply__add_8h.html new file mode 100644 index 0000000000..ab673590a5 --- /dev/null +++ b/docs/generated-html/thread__multiply__add_8h.html @@ -0,0 +1,110 @@ + + + + + + + +Cutlass: thread_multiply_add.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    thread_multiply_add.h File Reference
    +
    +
    + +

    Template implementing matrix multiply-add operations on fragments. +More...

    +
    #include <cutlass/fragment.h>
    +
    +

    Go to the source code of this file.

    + + + + + +

    +Classes

    struct  cutlass::gemm::ThreadMultiplyAdd< AccumulatorsPerThread_, ThreadsPerWarp_, ScalarA_, ScalarB_, ScalarC_ >
     Template performing matrix multiply-add operation within a thread. More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::gemm
     
    +
    + + + + diff --git a/docs/generated-html/thread__multiply__add_8h_source.html b/docs/generated-html/thread__multiply__add_8h_source.html new file mode 100644 index 0000000000..45c64172b4 --- /dev/null +++ b/docs/generated-html/thread__multiply__add_8h_source.html @@ -0,0 +1,105 @@ + + + + + + + +Cutlass: thread_multiply_add.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    thread_multiply_add.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/fragment.h>
    31 
    32 namespace cutlass {
    33 namespace gemm {
    34 
    36 
    38 template <typename AccumulatorsPerThread_,
    39  typename ThreadsPerWarp_,
    40  typename ScalarA_,
    41  typename ScalarB_,
    42  typename ScalarC_>
    47  typedef AccumulatorsPerThread_ AccumulatorsPerThread;
    49  typedef ThreadsPerWarp_ ThreadsPerWarp;
    53  typedef ScalarA_ ScalarA;
    57  typedef ScalarB_ ScalarB;
    61  typedef ScalarC_ ScalarC;
    64 
    66  CUTLASS_DEVICE ThreadMultiplyAdd() {}
    67 
    69  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    70  FragmentB const& b,
    71  Accumulators const& c,
    72  Accumulators& d) {
    73  for (int j = 0; j < AccumulatorsPerThread::kH; ++j) {
    74  for (int i = 0; i < AccumulatorsPerThread::kW; ++i) {
    75  d[j * AccumulatorsPerThread::kW + i] = a[i] * b[j] + c[j * AccumulatorsPerThread::kW + i];
    76  }
    77  }
    78  }
    79 };
    80 
    82 
    83 } // namespace gemm
    84 } // namespace cutlass
    Definition: convert.h:33
    +
    ThreadsPerWarp_ ThreadsPerWarp
    The number of threads per warp.
    Definition: thread_multiply_add.h:49
    +
    Shape< A_::kD *B_::kD, A_::kH *B_::kH, A_::kW *B_::kW, A_::kC *B_::kC > Shape
    Definition: shape.h:119
    +
    A template defining Fragment Concept.
    Definition: fragment.h:99
    +
    Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
    The fragment for A.
    Definition: thread_multiply_add.h:55
    +
    CUTLASS_DEVICE void multiply_add(FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
    Multiply : d = a*b + c.
    Definition: thread_multiply_add.h:69
    +
    Shape< 1, 1, 1, 1 > InstructionShape
    The shape of the instruction.
    Definition: thread_multiply_add.h:45
    +
    ScalarC_ ScalarC
    The type for C and D.
    Definition: thread_multiply_add.h:61
    +
    CUTLASS_DEVICE ThreadMultiplyAdd()
    Ctor.
    Definition: thread_multiply_add.h:66
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    AccumulatorsPerThread_ AccumulatorsPerThread
    The number of accumulators per thread.
    Definition: thread_multiply_add.h:47
    +
    ScalarB_ ScalarB
    The type for B.
    Definition: thread_multiply_add.h:57
    +
    Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
    The accumulators.
    Definition: thread_multiply_add.h:63
    +
    Template performing matrix multiply-add operation within a thread.
    Definition: thread_multiply_add.h:43
    +
    ScalarA_ ScalarA
    The type for A.
    Definition: thread_multiply_add.h:53
    +
    Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
    The fragment for B.
    Definition: thread_multiply_add.h:59
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    ShapeMul< AccumulatorsPerThread, ThreadsPerWarp >::Shape AccumulatorsPerWarp
    The number of accumulators per warp.
    Definition: thread_multiply_add.h:51
    +
    + + + + diff --git a/docs/generated-html/tile_8h.html b/docs/generated-html/tile_8h.html new file mode 100644 index 0000000000..76aeb22d4a --- /dev/null +++ b/docs/generated-html/tile_8h.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tile.h File Reference
    +
    +
    + +

    Defines a type for restructuring a tile. +More...

    +
    #include <cutlass/shape.h>
    +
    +

    Go to the source code of this file.

    + + + + + + +

    +Classes

    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, bool >
     
    struct  cutlass::ReshapeTile< Tile_, kAccessSize_, true >
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tile_8h_source.html b/docs/generated-html/tile_8h_source.html new file mode 100644 index 0000000000..33597e6a7c --- /dev/null +++ b/docs/generated-html/tile_8h_source.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/shape.h>
    31 
    32 namespace cutlass {
    33 
    35 
    36 // The following functor reshapes a tile of data. The goal is to have at least kAccessSize in
    37 // the inner-most dimension. If the user respects that constraint, there is nothing to be done. If
    38 // that's not the case, this functor will correct that and "extract" the right number of elements
    39 // from the next dimension.
    40 
    41 template <typename Tile_, int kAccessSize_, bool = (Tile_::kC < kAccessSize_)>
    42 struct ReshapeTile {
    43  typedef Tile_ Tile;
    44 };
    45 
    46 template <typename Tile_, int kAccessSize_>
    48  // Make sure the W dimension of the tile is large enough.
    49  static_assert(Tile_::kW >= kAccessSize_, "The W dimension is too small");
    50  // Make sure the dimension can be divided by the number of scalars.
    51  static_assert(Tile_::kW % kAccessSize_ == 0, "Not supported");
    52  // Collapse the W dimension.
    53  typedef Shape<Tile_::kD, Tile_::kH, Tile_::kW / kAccessSize_, kAccessSize_> Tile;
    54 };
    55 
    57 
    58 } // namespace cutlass
    Definition: convert.h:34
    + +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:63
    +
    Shape< Tile_::kD, Tile_::kH, Tile_::kW/kAccessSize_, kAccessSize_ > Tile
    Definition: tile.h:49
    +
    #define static_assert(__e, __m)
    Definition: nv_std.h:167
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    + + + + diff --git a/docs/generated-html/tile__iterator_8h.html b/docs/generated-html/tile__iterator_8h.html new file mode 100644 index 0000000000..2b778c1905 --- /dev/null +++ b/docs/generated-html/tile__iterator_8h.html @@ -0,0 +1,135 @@ + + + + + + + +Cutlass: tile_iterator.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tile_iterator.h File Reference
    +
    +
    + +

    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::IteratorAdvance
     Specifies dimension in which post-increment accesses advance. More...
     
    struct  cutlass::IteratorFragment
     Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix. More...
     
    struct  cutlass::TileTraits< Tile_, Delta_, Iterations_, ThreadOffset_ >
     A template defining Tile Traits Concept. More...
     
    struct  cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
     Iterator for accessing a stripmined tile in memory. More...
     
    struct  cutlass::TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
     Parameters to the iterator. More...
     
    struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
     An iterator implementing Tile Load Iterator Concept for loading a tile from memory. More...
     
    struct  cutlass::TileLoadIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
     Parameters. More...
     
    struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >
     An iterator implementing Tile Store Iterator Concept for storing a tile to memory. More...
     
    struct  cutlass::TileStoreIterator< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ >::Params
     Parameters. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tile__iterator_8h_source.html b/docs/generated-html/tile__iterator_8h_source.html new file mode 100644 index 0000000000..69ef0b1752 --- /dev/null +++ b/docs/generated-html/tile__iterator_8h_source.html @@ -0,0 +1,246 @@ + + + + + + + +Cutlass: tile_iterator.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tile_iterator.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/fragment.h>
    32 #include <cutlass/load_store.h>
    34 #include <cutlass/vector.h>
    35 
    36 namespace cutlass {
    37 
    39 
    58 
    62  enum Kind { kD, kH, kW };
    63 };
    64 
    68 };
    69 
    71 
    76 template <typename Tile_, typename Delta_, typename Iterations_, typename ThreadOffset_>
    77 struct TileTraits {
    79  typedef Tile_ Tile;
    80 
    82  typedef Delta_ Delta;
    83 
    85  typedef Iterations_ Iterations;
    86 
    88  typedef ThreadOffset_ ThreadOffset;
    89 };
    90 
    92 
    94 template <typename Traits_,
    95  typename Scalar_,
    98  typename Index_ = int,
    99  typename FragmentElement_ = Scalar_,
    101  typename Skew_ = Shape<0, 0, 0, 0> >
    104  typedef Traits_ Traits;
    105 
    107  typedef Scalar_ Scalar;
    108 
    110  typedef FragmentElement_ FragmentElement;
    111 
    113  static IteratorAdvance::Kind const kAdvance = Advance_;
    114 
    116  static IteratorFragment::Kind const kIteratorFragment = IteratorFragment_;
    117 
    120 
    122  typedef Index_ Index;
    123 
    125  typedef Skew_ Skew;
    126 
    128  typedef typename Traits::Tile Tile;
    129 
    131  typedef typename Traits::Delta Delta;
    132 
    134  typedef typename Traits::ImmediateOffsetStrides ImmediateOffsetStrides;
    135 
    137  typedef typename Traits::Iterations Iterations;
    138 
    140  typedef typename Traits::ThreadOffset ThreadOffset;
    141 
    143  static int const kAccessSize = Tile::kC;
    144 
    147 
    149  static int const kFragmentSize =
    161 
    164 
    165  //
    166  // Params struct
    167  //
    168 
    170  struct Params {
    174 
    178 
    180 
    183  int initialize(Index _stride_d,
    184  Index _stride_h,
    185  Index _stride_w,
    186  Index _inc_d,
    187  Index _inc_h,
    188  Index _inc_w,
    189  Index _inc_advance) {
    190  stride_d = _stride_d;
    191  stride_h = _stride_h;
    192  stride_w = _stride_w;
    193 
    194  inc_d = _inc_d;
    195  inc_h = _inc_h;
    196  inc_w = _inc_w;
    197  inc_advance = _inc_advance;
    198 
    199  return 0;
    200  }
    201 
    203  int initialize(Index _stride_d, Index _stride_h, Index _stride_w) {
    204  stride_d = _stride_d;
    205  stride_h = _stride_h;
    206  stride_w = _stride_w;
    207 
    208  inc_w = stride_w * Delta::kW;
    209  inc_h = stride_h * Delta::kH - stride_w * Delta::kW * (Iterations::kW - 1);
    210 
    211  if (kAdvance == IteratorAdvance::kH) {
    212  // Advance in the H dimension.
    213  inc_d = 0;
    214  } else if (kAdvance == IteratorAdvance::kW) {
    215  // Advance in the W dimension.
    216  inc_d = stride_w * Tile::kW - stride_h * Tile::kH;
    217  } else {
    218  // Advance in the D dimension.
    219  inc_d = stride_d;
    220  }
    221 
    222  inc_advance = 0;
    223 
    224  return 0;
    225  }
    226 
    228  stride_d = 0;
    229  stride_h = 0;
    230  stride_w = 1;
    231 
    232  inc_d = inc_h = inc_w = inc_advance = 0;
    233 
    234  return 0;
    235  }
    236  };
    237 
    239  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const { return true; }
    240 
    241  //
    242  // Static function members
    243  //
    244 
    246  template <typename PredicateIterator>
    247  CUTLASS_DEVICE static void initialize_predicates(PredicateIterator predicate_it,
    248  Coord<3> const &bounds,
    249  Coord<3> const &offset = make_Coord(0, 0, 0)) {
    250  for (int d = 0; d < Iterations::kD; ++d) {
    251  bool enable_d = (d * Delta::kD + offset[0] < bounds[0]);
    252  for (int h = 0; h < Iterations::kH; ++h) {
    253  bool enable_h = (h * Delta::kH + offset[1] < bounds[1]);
    254  for (int w = 0; w < Iterations::kW; ++w) {
    255  bool enable_w = (w * Tile::kC * Delta::kW + offset[2] < bounds[2]);
    256  predicate_it.set(d, h, w, 0, enable_d && enable_h && enable_w);
    257  }
    258  }
    259  }
    260  }
    261 };
    262 
    264 
    288 
    294 template <typename Traits_,
    295  typename Scalar_,
    298  typename Index_ = int,
    299  typename FragmentElement_ = Scalar_,
    301  typename Skew_ = Shape<0, 0, 0, 0> >
    302 struct TileLoadIterator : public TileIteratorBase<Traits_,
    303  Scalar_,
    304  Advance_,
    305  MemorySpace,
    306  Index_,
    307  FragmentElement_,
    308  IteratorFragment_,
    309  Skew_> {
    311  typedef TileIteratorBase<Traits_,
    312  Scalar_,
    313  Advance_,
    314  MemorySpace,
    315  Index_,
    316  FragmentElement_,
    317  IteratorFragment_,
    318  Skew_>
    320 
    322  typedef typename Base::Traits Traits;
    323 
    325  typedef typename Base::Scalar Scalar;
    326 
    329 
    332 
    335 
    338 
    340  typedef typename Base::Index Index;
    341 
    343  typedef typename Base::Skew Skew;
    344 
    346  typedef typename Base::Tile Tile;
    347 
    349  typedef typename Base::Delta Delta;
    350 
    352  typedef typename Base::Iterations Iterations;
    353 
    356 
    359 
    361  typedef typename Base::AccessType AccessType;
    362 
    364  typedef typename Base::Fragment Fragment;
    365 
    368 
    371 
    374 
    376  typedef typename Base::Storage SharedStorage;
    377 
    379  typedef typename Base::Params BaseParams;
    380 
    382  enum { kRequiresLoadFence = Tile::kD == 1 };
    383 
    385  typedef Scalar const *Pointer;
    386 
    388  struct Params : public BaseParams {
    390  Scalar const *pointer;
    391 
    394  int initialize(SharedStorage const &storage) {
    395  pointer = &storage[0];
    396  return 0;
    397  }
    398 
    403  pointer = ptr;
    404  return 0;
    405  }
    406 
    409  int initialize(Scalar const *ptr,
    410  Index _stride_d,
    411  Index _stride_h,
    412  Index _stride_w,
    413  Index _inc_d,
    414  Index _inc_h,
    415  Index _inc_w,
    416  Index _inc_advance) {
    417  pointer = ptr;
    419  _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance);
    420  return 0;
    421  }
    422 
    423  // Initializes params to default values
    426  };
    427 
    428  //
    429  // Data members
    430  //
    431 
    433  Params params;
    434 
    437 
    439  int stage;
    440 
    441  //
    442  // Static member functions
    443  //
    444 
    446  template <typename PredicateIterator>
    447  CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it,
    448  Coord<3> const &bounds,
    449  Coord<3> const &block_offset = make_Coord(0,
    450  0,
    451  0)) {
    453  predicate_it,
    454  bounds,
    455  block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC));
    456  }
    457 
    458  //
    459  // Methods
    460  //
    461 
    465 
    468  TileLoadIterator(Params const &_params,
    469  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    470  ThreadOffset thread_offset_func = ThreadOffset())
    471  : params(_params), stage(0) {
    472  thread_offset = thread_offset_func();
    473 
    474  Index block_offset_h = 0;
    475  Index block_offset_w = 0;
    476  if (kAdvance == IteratorAdvance::kH) {
    477  block_offset_h = block_offset[1];
    478  block_offset_w = block_offset[2];
    479  } else {
    480  block_offset_h = block_offset[2];
    481  block_offset_w = block_offset[1];
    482  }
    483 
    484  params.pointer += block_offset[0] * params.stride_d +
    485  (block_offset_h + thread_offset[1]) * params.stride_h +
    486  (block_offset_w + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w;
    487  }
    488 
    491  TileLoadIterator(Params const &,
    492  SharedStorage &shared_storage,
    493  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    494  ThreadOffset thread_offset_func = ThreadOffset())
    495  : stage(0) {
    496  int const offset = thread_offset_func()[2];
    497  params.pointer = &shared_storage[offset];
    498  }
    499 
    502  Scalar const *data() const { return params.pointer; }
    503 
    506 
    509 
    512 
    515 
    517  CUTLASS_DEVICE void inc_stage() {
    518  if (Tile::kD > 1) {
    519  int const kStageSize = Tile::kH * Tile::kW * Tile::kC;
    520  if (stage == Tile::kD - 1) {
    521  params.pointer -= (Tile::kD - 1) * kStageSize;
    522  stage = 0;
    523  } else {
    524  params.pointer += kStageSize;
    525  stage = stage + 1;
    526  }
    527  }
    528  }
    529 
    530  public:
    532  template <typename Fragment, typename PredicateIterator>
    533  CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it) {
    534  FragmentIterator frag_iterator(fragment);
    535 
    536  for (int d = 0; d < Iterations::kD; ++d) {
    537  for (int h = 0; h < Iterations::kH; ++h) {
    538  for (int w = 0; w < Iterations::kW; ++w, ++pred_it) {
    539  if (*pred_it) {
    541  reinterpret_cast<AccessType &>(frag_iterator.at(d, h, w, 0)), data(), 0);
    542  }
    543 
    544  if (w < Iterations::kW - 1) {
    545  inc_w();
    546  }
    547  }
    548  if (h < Iterations::kH - 1) {
    549  inc_h();
    550  }
    551  }
    552  if (d < Iterations::kD - 1) {
    553  inc_d();
    554  }
    555  }
    556  inc_advance();
    557  }
    558 
    560  template <typename Fragment>
    562  typename PredicateVector::TrivialIterator pred_it;
    563  load_post_increment(fragment, pred_it);
    564  }
    565 
    567  template <typename Fragment, typename PredicateIterator>
    568  CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const {
    569  TileLoadIterator _load_it(*this);
    570  _load_it.load_post_increment(fragment, pred_it);
    571  }
    572 
    574  template <typename Fragment>
    575  CUTLASS_HOST_DEVICE void load(Fragment &fragment) const {
    576  typename PredicateVector::TrivialIterator pred_it;
    577  load(fragment, pred_it);
    578  }
    579 };
    580 
    582 
    606 
    612 template <typename Traits_,
    613  typename Scalar_,
    616  typename Index_ = int,
    617  typename FragmentElement_ = Scalar_,
    619  typename Skew_ = Shape<0, 0, 0, 0> >
    620 struct TileStoreIterator : public TileIteratorBase<Traits_,
    621  Scalar_,
    622  Advance_,
    623  MemorySpace,
    624  Index_,
    625  FragmentElement_,
    626  IteratorFragment_,
    627  Skew_> {
    629  typedef TileIteratorBase<Traits_,
    630  Scalar_,
    631  Advance_,
    632  MemorySpace,
    633  Index_,
    634  FragmentElement_,
    635  IteratorFragment_,
    636  Skew_>
    638 
    640  typedef typename Base::Traits Traits;
    641 
    643  typedef typename Base::Scalar Scalar;
    644 
    647 
    650 
    653 
    656 
    658  typedef typename Base::Index Index;
    659 
    661  typedef typename Base::Skew Skew;
    662 
    664  typedef typename Base::Tile Tile;
    665 
    667  typedef typename Base::Delta Delta;
    668 
    670  typedef typename Base::Iterations Iterations;
    671 
    674 
    677 
    679  typedef typename Base::AccessType AccessType;
    680 
    682  typedef typename Base::Fragment Fragment;
    683 
    686 
    689 
    692 
    694  typedef typename Base::Storage SharedStorage;
    695 
    697  typedef typename Base::Params BaseParams;
    698 
    700  struct Params : public BaseParams {
    703 
    706  int initialize(SharedStorage &storage) {
    707  pointer = &storage[0];
    708  return 0;
    709  }
    710 
    715  pointer = ptr;
    716  return 0;
    717  }
    718 
    721  int initialize(Scalar *ptr,
    722  Index _stride_d,
    723  Index _stride_h,
    724  Index _stride_w,
    725  Index _inc_d,
    726  Index _inc_h,
    727  Index _inc_w,
    728  Index _inc_advance) {
    729  pointer = ptr;
    731  _stride_d, _stride_h, _stride_w, _inc_d, _inc_h, _inc_w, _inc_advance);
    732  return 0;
    733  }
    734 
    738  };
    739 
    740  //
    741  // Data members
    742  //
    743 
    746 
    749 
    751  int stage;
    752 
    753  //
    754  // Static member functions
    755  //
    756 
    758  template <typename PredicateIterator>
    759  CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it,
    760  Coord<3> const &bounds,
    761  Coord<3> const &block_offset = make_Coord(0,
    762  0,
    763  0)) {
    765  predicate_it,
    766  bounds,
    767  block_offset + make_Coord(0, thread_offset[1], thread_offset[2] * Tile::kC));
    768  }
    769 
    770  //
    771  // Methods
    772  //
    773 
    777 
    780  TileStoreIterator(Params const &_params,
    781  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    782  ThreadOffset thread_offset_func = ThreadOffset())
    783  : params(_params), stage(0) {
    784  thread_offset = thread_offset_func();
    785 
    786  params.pointer += block_offset[0] * params.stride_d +
    787  (block_offset[1] + thread_offset[1]) * params.stride_h +
    788  (block_offset[2] + thread_offset[2] * Tile::kC) / Tile::kC * params.stride_w;
    789  }
    790 
    794  SharedStorage &shared_storage,
    795  Coord<3> const &block_offset = make_Coord(0, 0, 0),
    796  ThreadOffset thread_offset_func = ThreadOffset())
    797  : stage(0) {
    798  int const offset = thread_offset_func()[2];
    799  params.pointer = &shared_storage[offset];
    800  }
    801 
    804  Scalar *data() const { return params.pointer; }
    805 
    808 
    811 
    814 
    817 
    819  CUTLASS_DEVICE void inc_stage() {
    820  if (Tile::kD > 1) {
    821  int const kStageSize = Tile::kH * Tile::kW * Tile::kC;
    822  if (stage == Tile::kD - 1) {
    823  params.pointer -= (Tile::kD - 1) * kStageSize;
    824  stage = 0;
    825  } else {
    826  params.pointer += kStageSize;
    827  stage = stage + 1;
    828  }
    829  }
    830  }
    831 
    832  public:
    834  template <typename Fragment, typename PredicateIterator>
    835  CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it) {
    836  FragmentIterator frag_iterator(fragment);
    837 
    838  for (int d = 0; d < Iterations::kD; ++d) {
    839  for (int h = 0; h < Iterations::kH; ++h) {
    840  for (int w = 0; w < Iterations::kW; ++w, ++pred_it) {
    841  if (*pred_it) {
    843  reinterpret_cast<AccessType &>(frag_iterator.at(d, h, w, 0)), data(), 0);
    844  }
    845  if (w < Iterations::kW - 1) {
    846  inc_w();
    847  }
    848  }
    849  if (h < Iterations::kH - 1) {
    850  inc_h();
    851  }
    852  }
    853  if (d < Iterations::kD - 1) {
    854  inc_d();
    855  }
    856  }
    857  inc_advance();
    858  }
    859 
    861  template <typename Fragment>
    863  typename PredicateVector::TrivialIterator pred_it;
    864  store_post_increment(fragment, pred_it);
    865  }
    866 
    868  template <typename Fragment, typename PredicateIterator>
    869  CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const {
    870  TileStoreIterator _store_it(*this);
    871  _store_it.store_post_increment(fragment, pred_it);
    872  }
    873 
    875  template <typename Fragment>
    876  CUTLASS_HOST_DEVICE void store(Fragment &fragment) const {
    877  typename PredicateVector::TrivialIterator pred_it;
    878  store(fragment, pred_it);
    879  }
    880 };
    881 }
    static int const kFragmentSize
    The size of storage needed per fragment.
    Definition: tile_iterator.h:149
    +
    static IteratorFragment::Kind const kIteratorFragment
    Specifies type of iterator fragment storage (Salar or WmmaMatrix)
    Definition: tile_iterator.h:334
    +
    CUTLASS_HOST_DEVICE void inc_advance()
    Increment in the next dimension.
    Definition: tile_iterator.h:816
    +
    FragmentConstIterator< Fragment, Iterations, AccessType > FragmentConstIterator
    The fragment const iterator.
    Definition: tile_iterator.h:158
    +
    TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
    Base class.
    Definition: tile_iterator.h:637
    +
    Base::Fragment Fragment
    Fragment definition.
    Definition: tile_iterator.h:682
    +
    Base::FragmentIterator FragmentIterator
    Fragment iterator definition.
    Definition: tile_iterator.h:367
    +
    Definition: convert.h:33
    +
    Base::Tile Tile
    Tile shape.
    Definition: tile_iterator.h:346
    +
    CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment, PredicateIterator pred_it)
    Loads a fragment and advances the iterator to the next tile.
    Definition: tile_iterator.h:533
    +
    Base::ThreadOffset ThreadOffset
    ThreadOffset functor.
    Definition: tile_iterator.h:355
    +
    static IteratorAdvance::Kind const kAdvance
    Specifies in which dimension post-increment accesses advance.
    Definition: tile_iterator.h:649
    +
    FragmentIterator::FragmentShape FragmentShape
    The shape of the fragment.
    Definition: tile_iterator.h:160
    +
    Traits::ThreadOffset ThreadOffset
    Thread offset.
    Definition: tile_iterator.h:140
    +
    static IteratorFragment::Kind const kIteratorFragment
    Specifies type of iterator fragment storage (Salar or WmmaMatrix)
    Definition: tile_iterator.h:652
    +
    Skew_ Skew
    Skew quantity.
    Definition: tile_iterator.h:125
    +
    Base::FragmentShape FragmentShape
    Fragment type.
    Definition: tile_iterator.h:676
    +
    CUTLASS_HOST_DEVICE int initialize(SharedStorage &storage)
    Initialize params to access storage object.
    Definition: tile_iterator.h:706
    +
    Enum to specify which memory space data resides in.
    Definition: load_store.h:39
    +
    Base::Skew Skew
    Skew quantity.
    Definition: tile_iterator.h:343
    +
    Kind
    Definition: tile_iterator.h:62
    +
    CUTLASS_HOST_DEVICE int initialize()
    Definition: tile_iterator.h:227
    +
    Base::Skew Skew
    Skew quantity.
    Definition: tile_iterator.h:661
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Base::Storage SharedStorage
    Storage object which may be stored to.
    Definition: tile_iterator.h:694
    +
    A template defining Tile Traits Concept.
    Definition: tile_iterator.h:77
    +
    CUTLASS_HOST_DEVICE Scalar const * data() const
    Returns the current pointer.
    Definition: tile_iterator.h:502
    +
    TileIteratorBase< Traits_, Scalar_, Advance_, MemorySpace, Index_, FragmentElement_, IteratorFragment_, Skew_ > Base
    Base class.
    Definition: tile_iterator.h:319
    +
    CUTLASS_HOST_DEVICE TileLoadIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile load iterator.
    Definition: tile_iterator.h:491
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar const *ptr, Index stride_d, Index stride_h, Index stride_w)
    Initializes params to access a raw pointer.
    Definition: tile_iterator.h:401
    +
    Base::Params BaseParams
    IteratorBase parameters.
    Definition: tile_iterator.h:379
    +
    Params params
    Parameters structure.
    Definition: tile_iterator.h:745
    +
    static CUTLASS_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
    The load function.
    Definition: load_store.h:59
    +
    CUTLASS_HOST_DEVICE int initialize(SharedStorage const &storage)
    Initialize params to access storage object.
    Definition: tile_iterator.h:394
    +
    Definition: tile_iterator.h:382
    +
    Base::Scalar Scalar
    Scalar element.
    Definition: tile_iterator.h:325
    +
    Base::AccessType AccessType
    Memory access type.
    Definition: tile_iterator.h:361
    +
    Definition: tile_iterator.h:62
    +
    CUTLASS_HOST_DEVICE void store(Fragment &fragment, PredicateIterator pred_it) const
    Stores a fragment without advancing the iterator.
    Definition: tile_iterator.h:869
    +
    static IteratorAdvance::Kind const kAdvance
    Specifies in which dimension post-increment accesses advance.
    Definition: tile_iterator.h:331
    +
    CUTLASS_HOST_DEVICE void load_post_increment(Fragment &fragment)
    Loads a fragment and advances the iterator to the next tile.
    Definition: tile_iterator.h:561
    +
    CUTLASS_HOST_DEVICE TileLoadIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile load iterator.
    Definition: tile_iterator.h:468
    +
    CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
    Is the iterator valid?
    Definition: tile_iterator.h:239
    +
    Iterations_ Iterations
    Number of accesses performed.
    Definition: tile_iterator.h:85
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar *ptr, Index stride_d, Index stride_h, Index stride_w)
    Initializes params to access a raw pointer.
    Definition: tile_iterator.h:713
    +
    Params params
    Parameters structure.
    Definition: tile_iterator.h:433
    +
    Iterator that always returns true.
    Definition: predicate_vector.h:308
    +
    Base::Scalar Scalar
    Scalar element.
    Definition: tile_iterator.h:643
    +
    Base::Traits Traits
    concept TileTraits
    Definition: tile_iterator.h:640
    +
    Kind
    Definition: load_store.h:40
    +
    Index stride_h
    Definition: tile_iterator.h:172
    +
    CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment)
    Stores a fragment and advances to the next tile.
    Definition: tile_iterator.h:862
    +
    Fragment< Scalar, ShapeCount< Tile >::kCount, kFragmentSize > Storage
    The storage.
    Definition: tile_iterator.h:152
    +
    CUTLASS_HOST_DEVICE int initialize()
    Definition: tile_iterator.h:425
    +
    CUTLASS_HOST_DEVICE void inc_d()
    Increment in the D dimension.
    Definition: tile_iterator.h:807
    +
    Base::Iterations Iterations
    Iterations.
    Definition: tile_iterator.h:352
    +
    CUTLASS_HOST_DEVICE int initialize()
    Initializes params to default values.
    Definition: tile_iterator.h:737
    +
    Base::FragmentConstIterator FragmentConstIterator
    Fragment const iterator definition.
    Definition: tile_iterator.h:370
    +
    Index_ Index
    Index type.
    Definition: tile_iterator.h:122
    +
    static CUTLASS_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
    The store function.
    Definition: load_store.h:136
    +
    Index inc_h
    Definition: tile_iterator.h:176
    +
    Defines container classes and iterators for managing a statically sized vector of boolean predicates...
    +
    Base::Storage SharedStorage
    Storage object that may be loaded from.
    Definition: tile_iterator.h:376
    +
    Parameters.
    Definition: tile_iterator.h:700
    +
    CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
    Initializes a predicate vector.
    Definition: tile_iterator.h:759
    +
    An iterator implementing Tile Load Iterator Concept for loading a tile from memory.
    Definition: tile_iterator.h:302
    +
    Base::Traits Traits
    concept TileTraits
    Definition: tile_iterator.h:322
    +
    static CUTLASS_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &offset=make_Coord(0, 0, 0))
    Initializes a predicate vector.
    Definition: tile_iterator.h:247
    +
    Base::Params BaseParams
    IteratorBase parameters.
    Definition: tile_iterator.h:697
    +
    Base::FragmentElement FragmentElement
    Fragment element.
    Definition: tile_iterator.h:328
    +
    Traits::Tile Tile
    Tile shape.
    Definition: tile_iterator.h:128
    +
    FragmentIterator< Fragment, Iterations, AccessType > FragmentIterator
    The fragment iterator.
    Definition: tile_iterator.h:156
    +
    int stage
    The stage.
    Definition: tile_iterator.h:751
    +
    CUTLASS_HOST_DEVICE int initialize(Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
    Initializes params.
    Definition: tile_iterator.h:183
    +
    Base::AccessType AccessType
    Memory access type.
    Definition: tile_iterator.h:679
    +
    Base::FragmentElement FragmentElement
    Fragment element.
    Definition: tile_iterator.h:646
    +
    Definition: load_store.h:41
    +
    CUTLASS_DEVICE void inc_stage()
    Increment the stage.
    Definition: tile_iterator.h:517
    +
    Kind
    Definition: tile_iterator.h:67
    +
    Base::PredicateVector PredicateVector
    Default predicate mask type.
    Definition: tile_iterator.h:373
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar const *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
    Initializes params.
    Definition: tile_iterator.h:409
    +
    CUTLASS_HOST_DEVICE int initialize(Scalar *ptr, Index _stride_d, Index _stride_h, Index _stride_w, Index _inc_d, Index _inc_h, Index _inc_w, Index _inc_advance)
    Initializes params.
    Definition: tile_iterator.h:721
    +
    FragmentElement_ FragmentElement
    Fragment element.
    Definition: tile_iterator.h:110
    +
    Base::Index Index
    Index type.
    Definition: tile_iterator.h:658
    +
    Scalar * pointer
    Pointer to memory.
    Definition: tile_iterator.h:702
    +
    Index inc_advance
    Definition: tile_iterator.h:179
    +
    Definition: tile_iterator.h:67
    +
    ShapeMul< Iterations, Shape< 1, 1, 1, kElementsPerAccess > >::Shape FragmentShape
    The shape of the the fragment.
    Definition: fragment.h:185
    +
    Index stride_w
    Definition: tile_iterator.h:173
    +
    CUTLASS_HOST_DEVICE TileLoadIterator()
    Default constructor.
    Definition: tile_iterator.h:464
    +
    Defines abstractions for efficiently loading and storing vectors to memory.
    +
    Scalar const * pointer
    Pointer to memory.
    Definition: tile_iterator.h:390
    +
    CUTLASS_HOST_DEVICE TileStoreIterator(Params const &_params, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile store iterator.
    Definition: tile_iterator.h:780
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Coord< 4 > thread_offset
    Offset of an individual lane from the start of the tile.
    Definition: tile_iterator.h:748
    +
    Traits::Iterations Iterations
    Iterations.
    Definition: tile_iterator.h:137
    +
    static int const kAccessSize
    The number of scalars accessed per load/store.
    Definition: tile_iterator.h:143
    +
    Tile_ Tile
    Shape of the tile.
    Definition: tile_iterator.h:79
    +
    Delta_ Delta
    Number of steps between accesses along each dimension.
    Definition: tile_iterator.h:82
    +
    CUTLASS_HOST_DEVICE int initialize(Index _stride_d, Index _stride_h, Index _stride_w)
    Definition: tile_iterator.h:203
    +
    Index stride_d
    Definition: tile_iterator.h:171
    +
    CUTLASS_HOST_DEVICE void inc_advance()
    Increment in the next dimension.
    Definition: tile_iterator.h:514
    +
    Definition: vector.h:61
    +
    Base::Delta Delta
    Delta.
    Definition: tile_iterator.h:349
    +
    Base::Tile Tile
    Tile shape.
    Definition: tile_iterator.h:664
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Base::FragmentShape FragmentShape
    Fragment type.
    Definition: tile_iterator.h:358
    +
    Specifies dimension in which post-increment accesses advance.
    Definition: tile_iterator.h:61
    +
    static MemorySpace::Kind const kMemorySpace
    Source or destination memory space.
    Definition: tile_iterator.h:655
    +
    CUTLASS_HOST_DEVICE void inc_w()
    Increment in the W dimension.
    Definition: tile_iterator.h:511
    +
    Statically-sized array specifying Coords within a tensor.
    Definition: coord.h:48
    +
    Traits::ImmediateOffsetStrides ImmediateOffsetStrides
    The strides in each dimension between different loads/stores.
    Definition: tile_iterator.h:134
    +
    Base::Fragment Fragment
    Fragment definition.
    Definition: tile_iterator.h:364
    +
    Base::Iterations Iterations
    Iterations.
    Definition: tile_iterator.h:670
    +
    Defines a 1D vector of elements held in the registers of each thread.
    +
    Iterator for accessing a stripmined tile in memory.
    Definition: tile_iterator.h:102
    +
    static IteratorFragment::Kind const kIteratorFragment
    Specifies iterator storage fragment type (Scalar or WmmaMatrix)
    Definition: tile_iterator.h:116
    +
    Base::Delta Delta
    Delta.
    Definition: tile_iterator.h:667
    +
    Definition: tile_iterator.h:62
    +
    CUTLASS_HOST_DEVICE Scalar * data() const
    Returns the current pointer.
    Definition: tile_iterator.h:804
    +
    ThreadOffset_ ThreadOffset
    Functor that returns the logical coordinate of each entity&#39;s initial offset in the tile...
    Definition: tile_iterator.h:88
    +
    Vectorize< FragmentElement, kAccessSize >::Type AccessType
    The elements loaded/store by one instruction.
    Definition: tile_iterator.h:146
    +
    CUTLASS_HOST_DEVICE void inc_d()
    Increment in the D dimension.
    Definition: tile_iterator.h:505
    +
    CUTLASS_HOST_DEVICE void inc_h()
    Increment in the H dimension.
    Definition: tile_iterator.h:810
    +
    CUTLASS_HOST_DEVICE void store(Fragment &fragment) const
    Stores a fragment without advancing the iterator.
    Definition: tile_iterator.h:876
    +
    CUTLASS_HOST_DEVICE void inc_h()
    Increment in the H dimension.
    Definition: tile_iterator.h:508
    +
    Parameters.
    Definition: tile_iterator.h:388
    +
    static MemorySpace::Kind const kMemorySpace
    Source or destination memory space.
    Definition: tile_iterator.h:337
    +
    Base::ThreadOffset ThreadOffset
    ThreadOffset functor.
    Definition: tile_iterator.h:673
    +
    static MemorySpace::Kind const kMemorySpace
    Source or destination memory space.
    Definition: tile_iterator.h:119
    +
    Base::FragmentIterator FragmentIterator
    Fragment iterator definition.
    Definition: tile_iterator.h:685
    +
    CUTLASS_HOST_DEVICE void load(Fragment &fragment, PredicateIterator pred_it) const
    Loads a fragment without advancing the iterator..
    Definition: tile_iterator.h:568
    +
    CUTLASS_HOST_DEVICE TileStoreIterator(Params const &, SharedStorage &shared_storage, Coord< 3 > const &block_offset=make_Coord(0, 0, 0), ThreadOffset thread_offset_func=ThreadOffset())
    Constructs a tile store iterator.
    Definition: tile_iterator.h:793
    +
    CUTLASS_HOST_DEVICE void initialize_predicates(PredicateIterator predicate_it, Coord< 3 > const &bounds, Coord< 3 > const &block_offset=make_Coord(0, 0, 0))
    Initializes a predicate vector.
    Definition: tile_iterator.h:447
    +
    Fragment< FragmentElement, ShapeCount< Iterations >::kCount *kAccessSize > Fragment
    The fragment.
    Definition: tile_iterator.h:154
    +
    CUTLASS_HOST_DEVICE void load(Fragment &fragment) const
    Loads a fragment without advancing the iterator..
    Definition: tile_iterator.h:575
    +
    Definition: tile_iterator.h:62
    +
    static IteratorAdvance::Kind const kAdvance
    Specifies dimension in which post-increment accesses advance.
    Definition: tile_iterator.h:113
    +
    Index inc_w
    Definition: tile_iterator.h:177
    +
    Coord< 4 > thread_offset
    Offset of an individual lane from the start of the tile.
    Definition: tile_iterator.h:436
    +
    Traits::Delta Delta
    Distance along each dimension.
    Definition: tile_iterator.h:131
    +
    int stage
    Stage argument enables wrapping after some number of tiles have been loaded.
    Definition: tile_iterator.h:439
    +
    Base::FragmentConstIterator FragmentConstIterator
    Fragment const iterator definition.
    Definition: tile_iterator.h:688
    +
    CUTLASS_HOST_DEVICE TileStoreIterator()
    Default constructor.
    Definition: tile_iterator.h:776
    +
    Base::PredicateVector PredicateVector
    Default predicate mask type.
    Definition: tile_iterator.h:691
    +
    Scalar const * Pointer
    The pointer type.
    Definition: tile_iterator.h:385
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    Parameters to the iterator.
    Definition: tile_iterator.h:170
    +
    Base::Index Index
    Index type.
    Definition: tile_iterator.h:340
    +
    CUTLASS_DEVICE void inc_stage()
    Increment the stage.
    Definition: tile_iterator.h:819
    +
    CUTLASS_HOST_DEVICE void store_post_increment(Fragment &fragment, PredicateIterator pred_it)
    Stores a fragment and advances to the next tile.
    Definition: tile_iterator.h:835
    +
    CUTLASS_HOST_DEVICE void inc_w()
    Increment in the W dimension.
    Definition: tile_iterator.h:813
    +
    PredicateVector< ShapeCount< Iterations >::kCount > PredicateVector
    Default predicate mask type.
    Definition: tile_iterator.h:163
    +
    Definition: tile_iterator.h:67
    +
    Scalar_ Scalar
    Scalar element.
    Definition: tile_iterator.h:107
    +
    Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix.
    Definition: tile_iterator.h:66
    +
    Index inc_d
    Definition: tile_iterator.h:175
    +
    An iterator implementing Tile Store Iterator Concept for storing a tile to memory.
    Definition: tile_iterator.h:620
    +
    Traits_ Traits
    concept TileTraits
    Definition: tile_iterator.h:104
    +
    + + + + diff --git a/docs/generated-html/tile__traits__standard_8h.html b/docs/generated-html/tile__traits__standard_8h.html new file mode 100644 index 0000000000..d45ace8cad --- /dev/null +++ b/docs/generated-html/tile__traits__standard_8h.html @@ -0,0 +1,121 @@ + + + + + + + +Cutlass: tile_traits_standard.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    tile_traits_standard.h File Reference
    +
    +
    + +

    Defines tile traits for several tile partitioning arrangements of threads expected to achieve efficient streaming performance. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::TiledThreadOffset< ThreadShape >
     Basic thread offset function computed from a thread shape. More...
     
    struct  cutlass::TileTraitsStrideMajor< Tile_, Threads >
     
    struct  cutlass::TileTraitsContiguousMajor< Tile_, Threads >
     
    struct  cutlass::TileTraitsWarpRake< Tile_, Threads >
     Tiling in which warps rake across the contiguous dimension. More...
     
    struct  cutlass::TileTraitsWarpRake< Tile_, Threads >::ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    struct  cutlass::TileTraitsStandard< Tile_, Threads >
     Chooses 'best' shape to enable warp raking along contiguous dimension if possible. More...
     
    + + + +

    +Namespaces

     cutlass
     
    +
    + + + + diff --git a/docs/generated-html/tile__traits__standard_8h_source.html b/docs/generated-html/tile__traits__standard_8h_source.html new file mode 100644 index 0000000000..ed4a1efa90 --- /dev/null +++ b/docs/generated-html/tile__traits__standard_8h_source.html @@ -0,0 +1,132 @@ + + + + + + + +Cutlass: tile_traits_standard.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    tile_traits_standard.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/tile_iterator.h>
    32 
    33 namespace cutlass {
    34 
    36 
    38 template <typename ThreadShape>
    42  Coord<4> operator()() const {
    43  Coord<4> thread_offset;
    44 
    45  int index = threadIdx.x;
    46 
    47  thread_offset[3] = (index % ThreadShape::kC);
    48  index = (index / ThreadShape::kC);
    49 
    50  thread_offset[2] = (index % ThreadShape::kW);
    51  index = (index / ThreadShape::kW);
    52 
    53  thread_offset[1] = (index % ThreadShape::kH);
    54  index = (index / ThreadShape::kH);
    55 
    56  thread_offset[0] = index;
    57 
    58  return thread_offset;
    59  }
    60 };
    61 
    63 
    66 template <typename Tile_, int Threads>
    69  typedef Tile_ Tile;
    70 
    72  static int const kThreads = Threads;
    73 
    74  // Static assertions
    76  "Tiling undefined if elements not divisible by threads.");
    77 
    78  static_assert(Tile::kW <= kThreads,
    79  "This specialization assumes there are more threads than the contiguous dimension "
    80  "of the tile.");
    81 
    83  typedef Shape<1, kThreads / Tile::kW, Tile::kW, 1> ThreadShape;
    84 
    87 
    89  typedef Shape<1, Tile::kH / ThreadShape::kH, 1, 1> Iterations;
    90 
    93 };
    94 
    96 
    99 template <typename Tile_, int Threads>
    102  typedef Tile_ Tile;
    103 
    105  static int const kThreads = Threads;
    106 
    107  // Static assertions
    108  static_assert(Tile::kW >= kThreads,
    109  "This specialization assumes there are more threads than the contiguous dimension "
    110  "of the tile.");
    111 
    113  "Tiling undefined if elements not divisible by threads.");
    114 
    115  static_assert(!(Tile::kW % kThreads),
    116  "The contiguous size of the tile must be divisible by the number of threads.");
    117 
    120 
    123 
    125  typedef Shape<1, Tile::kH, Tile::kW / kThreads> Iterations;
    126 
    129 };
    130 
    132 
    134 template <typename Tile_, int Threads>
    137  typedef Tile_ Tile;
    138 
    140  static int const kThreads = Threads;
    141 
    143  static int const kWarpSize = 32;
    144 
    146  static int const kWarpCount = kThreads / kWarpSize;
    147 
    148  // Static assertions
    150  "Tiling undefined if elements not divisible by threads.");
    151 
    152  static_assert(!(kThreads % kWarpSize), "Number of threads must be divisible by the warp size.");
    153 
    154  static_assert(!(Tile::kW % kWarpSize), "Contiguous dimension must be divisible by the warp size");
    155 
    157  static int const kWarpsStrided = __NV_STD_MIN(kWarpCount, Tile::kH);
    158 
    161 
    164 
    167 
    169  typedef Shape<1, Tile::kH / Delta::kH, Tile::kW / ThreadShape::kW> Iterations;
    170 
    172  struct ThreadOffset {
    176  int tid = threadIdx.x;
    177  int warp = (tid / kWarpSize);
    178  int lane = (tid % kWarpSize);
    179 
    180  static int const kWarpSpanContiguous = kWarpSize * Iterations::kW;
    181 
    182  int warp_w = (warp % kWarpsContiguous);
    183  int warp_h = (warp / kWarpsContiguous);
    184 
    185  return make_Coord(0, warp_h, lane + kWarpSpanContiguous * warp_w, 0);
    186  }
    187  };
    188 };
    189 
    191 
    193 template <typename Tile_, int Threads>
    196  typedef Tile_ Tile;
    197 
    199  static int const kThreads = Threads;
    200 
    202  static int const kWarpSize = 32;
    203 
    205  static int const kWarpCount = kThreads / kWarpSize;
    206 
    207  // Static assertions
    209  "Tiling undefined if elements not divisible by threads.");
    210 
    214  typedef typename platform::conditional <
    215  Tile::kW<kWarpSize,
    217  typename platform::conditional<!(Tile::kW % kWarpSize),
    220  type Traits;
    221 
    223  typedef typename Traits::Delta Delta;
    224 
    227  typedef Delta ImmediateOffsetStrides;
    228 
    230  typedef typename Traits::Iterations Iterations;
    231 
    233  typedef typename Traits::ThreadOffset ThreadOffset;
    234 };
    235 
    237 
    238 } // namespace cutlass
    Shape< 1, Tile::kH/Delta::kH, Tile::kW/ThreadShape::kW > Iterations
    Number of iterations.
    Definition: tile_traits_standard.h:169
    +
    Definition: convert.h:33
    +
    Shape< 1, Tile::kH/ThreadShape::kH, 1, 1 > Iterations
    Number of iterations.
    Definition: tile_traits_standard.h:89
    +
    static int const kWarpCount
    Number of participating warps.
    Definition: tile_traits_standard.h:205
    +
    Definition: tile_traits_standard.h:100
    +
    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
    +
    static int const kWarpsStrided
    Warps strip-mined across strided dimension.
    Definition: tile_traits_standard.h:157
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:105
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Computes the thread offset in (H, W) based on thread ID.
    Definition: tile_traits_standard.h:172
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:72
    +
    Chooses &#39;best&#39; shape to enable warp raking along contiguous dimension if possible.
    Definition: tile_traits_standard.h:194
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:137
    +
    static int const kWarpsContiguous
    Warps stripmined contiguous dimension.
    Definition: tile_traits_standard.h:160
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Computes the logical coordinate from thread shape.
    Definition: tile_traits_standard.h:42
    +
    Shape< 1, kWarpsStrided, kWarpSize > Delta
    The same warp rakes along the contiguous dimension.
    Definition: tile_traits_standard.h:166
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Basic thread offset function computed from a thread shape.
    Definition: tile_traits_standard.h:175
    +
    Basic thread offset function computed from a thread shape.
    Definition: tile_traits_standard.h:39
    +
    static int const kH
    The height of the cube.
    Definition: shape.h:68
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:140
    +
    Shape< 1, ThreadShape::kH, 1, 1 > Delta
    Delta along each dimension.
    Definition: tile_traits_standard.h:86
    +
    Shape< 1, kThreads/Tile::kW, Tile::kW, 1 > ThreadShape
    Shape of threads.
    Definition: tile_traits_standard.h:76
    +
    static int const kWarpSize
    Hard-coded warp size.
    Definition: tile_traits_standard.h:143
    +
    #define __NV_STD_MIN(a, b)
    Select minimum(a, b)
    Definition: platform.h:160
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:196
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:69
    +
    static int const kWarpCount
    Number of participating warps.
    Definition: tile_traits_standard.h:146
    +
    Shape< 1, kWarpsStrided, kWarpsContiguous *kWarpSize > ThreadShape
    Arrangement of threads.
    Definition: tile_traits_standard.h:163
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Definition: tile_traits_standard.h:67
    +
    std::conditional (true specialization)
    Definition: platform.h:343
    +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    + +
    TiledThreadOffset< ThreadShape > ThreadOffset
    Computes the initial offset.
    Definition: tile_traits_standard.h:92
    +
    Tile_ Tile
    Shape of tile.
    Definition: tile_traits_standard.h:102
    +
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    +
    Tiling in which warps rake across the contiguous dimension.
    Definition: tile_traits_standard.h:135
    +
    static int const kWarpSize
    Hard-coded warp size.
    Definition: tile_traits_standard.h:202
    +
    Shape< 1, 1, kThreads > Delta
    Delta between each thread&#39;s access.
    Definition: tile_traits_standard.h:122
    +
    Shape< 1, 1, kThreads > ThreadShape
    Thread shape.
    Definition: tile_traits_standard.h:110
    +
    Compute derived counted of a Layout Concept based class.
    Definition: shape.h:79
    +
    TiledThreadOffset< ThreadShape > ThreadOffset
    Computes the initial offset.
    Definition: tile_traits_standard.h:128
    +
    static int const kThreads
    Number of participating threads.
    Definition: tile_traits_standard.h:199
    +
    Shape< 1, Tile::kH, Tile::kW/kThreads > Iterations
    Number of iterations.
    Definition: tile_traits_standard.h:125
    +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector-members.html b/docs/generated-html/unioncutlass_1_1Vector-members.html new file mode 100644 index 0000000000..f581f8db55 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vector< Scalar_, kLanes_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector.html b/docs/generated-html/unioncutlass_1_1Vector.html new file mode 100644 index 0000000000..f8e027716b --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector.html @@ -0,0 +1,314 @@ + + + + + + + +Cutlass: cutlass::Vector< Scalar_, kLanes_ > Union Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vector< Scalar_, kLanes_ > Union Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + + + + + + + + + + + +

    +Public Types

    enum  { kLanes = kLanes_ + }
     The number of elements in the vector. More...
     
    enum  { kVectorSize = kLanes * (int)sizeof(Scalar) + }
     The size of the vector. More...
     
    enum  
     The number of registers needed to store the vector. More...
     
    typedef Scalar_ Scalar
     The scalar type. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Scalar const & operator[] (uint32_t i) const
     Accessor to the ith lane. More...
     
    CUTLASS_DEVICE Scalaroperator[] (uint32_t i)
     Accessor to the ith lane. More...
     
    + + + + + + + + + + +

    +Public Attributes

    AlignedStruct< kVectorSizealigned_
     The aligned storage to make sure we have good alignment. More...
     
    Scalar scalars [kLanes]
     The associated array of scalars. More...
     
    uint32_t registers [kRegisters]
     The data in registers. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    typedef Scalar_ cutlass::Vector< Scalar_, kLanes_ >::Scalar
    +
    + +
    +
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kLanes 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kVectorSize 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    anonymous enum
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar const& cutlass::Vector< Scalar_, kLanes_ >::operator[] (uint32_t i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar& cutlass::Vector< Scalar_, kLanes_ >::operator[] (uint32_t i)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ aligned_

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    AlignedStruct<kVectorSize> cutlass::Vector< Scalar_, kLanes_ >::aligned_
    +
    + +
    +
    + +

    ◆ registers

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    uint32_t cutlass::Vector< Scalar_, kLanes_ >::registers[kRegisters]
    +
    + +
    +
    + +

    ◆ scalars

    + +
    +
    +
    +template<typename Scalar_, int kLanes_>
    + + + + +
    Scalar cutlass::Vector< Scalar_, kLanes_ >::scalars[kLanes]
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html new file mode 100644 index 0000000000..26516dab60 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4-members.html @@ -0,0 +1,98 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::Vector< half, kLanes_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html new file mode 100644 index 0000000000..80d3d9ee93 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1Vector_3_01half_00_01kLanes___01_4.html @@ -0,0 +1,314 @@ + + + + + + + +Cutlass: cutlass::Vector< half, kLanes_ > Union Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::Vector< half, kLanes_ > Union Template Reference
    +
    +
    + +

    #include <vector.h>

    + + + + + + + + + + + + + + +

    +Public Types

    enum  { kLanes = kLanes_ + }
     The number of elements in the vector. More...
     
    enum  { kVectorSize = kLanes * (int)sizeof(Scalar) + }
     The size of the vector. More...
     
    enum  
     The number of registers needed to store the vector. More...
     
    typedef half Scalar
     The scalar type. More...
     
    + + + + + + + +

    +Public Member Functions

    CUTLASS_DEVICE Scalar const & operator[] (uint32_t i) const
     Accessor to the ith lane. More...
     
    CUTLASS_DEVICE Scalaroperator[] (uint32_t i)
     Accessor to the ith lane. More...
     
    + + + + + + + + + + +

    +Public Attributes

    AlignedStruct< kVectorSizealigned_
     The aligned storage to make sure we have good alignment. More...
     
    uint16_t scalars [kLanes]
     The associated array of scalars. More...
     
    uint32_t registers [kRegisters]
     The data in registers. More...
     
    +

    Member Typedef Documentation

    + +

    ◆ Scalar

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    typedef half cutlass::Vector< half, kLanes_ >::Scalar
    +
    + +
    +
    +

    Member Enumeration Documentation

    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kLanes 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    anonymous enum
    +
    + + +
    Enumerator
    kVectorSize 
    + +
    +
    + +

    ◆ anonymous enum

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    anonymous enum
    +
    + +
    +
    +

    Member Function Documentation

    + +

    ◆ operator[]() [1/2]

    + +
    +
    +
    +template<int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar const& cutlass::Vector< half, kLanes_ >::operator[] (uint32_t i) const
    +
    +inline
    +
    + +
    +
    + +

    ◆ operator[]() [2/2]

    + +
    +
    +
    +template<int kLanes_>
    + + + + + +
    + + + + + + + + +
    CUTLASS_DEVICE Scalar& cutlass::Vector< half, kLanes_ >::operator[] (uint32_t i)
    +
    +inline
    +
    + +
    +
    +

    Member Data Documentation

    + +

    ◆ aligned_

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    AlignedStruct<kVectorSize> cutlass::Vector< half, kLanes_ >::aligned_
    +
    + +
    +
    + +

    ◆ registers

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    uint32_t cutlass::Vector< half, kLanes_ >::registers[kRegisters]
    +
    + +
    +
    + +

    ◆ scalars

    + +
    +
    +
    +template<int kLanes_>
    + + + + +
    uint16_t cutlass::Vector< half, kLanes_ >::scalars[kLanes]
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html new file mode 100644 index 0000000000..5b998e32ae --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html new file mode 100644 index 0000000000..1a79c8cfc6 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmEpilogueTraits_1_1StreamSharedStorage.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage Union Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage Union Reference
    +
    +
    + +

    The shared memory storage to exchange data. +

    + +

    #include <gemm_epilogue_traits.h>

    + + + + + + +

    +Public Attributes

    SharedStoreIteratorD::SharedStorage store
     
    SharedLoadIteratorD::SharedStorage load
     
    +

    Member Data Documentation

    + +

    ◆ load

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedLoadIteratorD::SharedStorage cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage::load
    +
    + +
    +
    + +

    ◆ store

    + +
    +
    +
    +template<typename OutputTile_, typename Accumulators_, typename GlobalLoadIteratorC_, typename GlobalTransformerC_, typename GlobalTransformerD_, typename GlobalStoreIteratorD_, typename SharedStoreIteratorD_, typename SharedStoreTransformerD_, typename SharedLoadIteratorD_, typename Iterations_, typename Delta_, typename Functor_, typename Index_ = int>
    + + + + +
    SharedStoreIteratorD::SharedStorage cutlass::gemm::GemmEpilogueTraits< OutputTile_, Accumulators_, GlobalLoadIteratorC_, GlobalTransformerC_, GlobalTransformerD_, GlobalStoreIteratorD_, SharedStoreIteratorD_, SharedStoreTransformerD_, SharedLoadIteratorD_, Iterations_, Delta_, Functor_, Index_ >::StreamSharedStorage::store
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html new file mode 100644 index 0000000000..be28d80e5f --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html new file mode 100644 index 0000000000..c182796bbb --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1SharedStorage.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage Union Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage Union Reference
    +
    +
    + +

    The storage in shared memory. +

    + +

    #include <gemm_traits.h>

    + + + + + + +

    +Public Attributes

    MainLoopSharedStorage main_loop
     
    Epilogue::SharedStorage epilogue
     
    +

    Member Data Documentation

    + +

    ◆ epilogue

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    Epilogue::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage::epilogue
    +
    + +
    +
    + +

    ◆ main_loop

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    + + + + +
    MainLoopSharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::SharedStorage::main_loop
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html new file mode 100644 index 0000000000..b675d5ab5c --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ > Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html new file mode 100644 index 0000000000..97b2113bae --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GemmTraits_1_1StreamSharedStorage.html @@ -0,0 +1,140 @@ + + + + + + + +Cutlass: cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ > Union Template Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ > Union Template Reference
    +
    +
    + +

    #include <gemm_traits.h>

    + + + + + + +

    +Public Attributes

    GlobalLoadStream_::SharedStorage global
     
    SharedLoadStream_::SharedStorage shared
     
    +

    Member Data Documentation

    + +

    ◆ global

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    +
    +template<typename GlobalLoadStream_, typename SharedLoadStream_>
    + + + + +
    GlobalLoadStream_::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >::global
    +
    + +
    +
    + +

    ◆ shared

    + +
    +
    +
    +template<typename GemmConfig_, typename GlobalLoadStreamA_, typename GlobalLoadStreamB_, typename SharedLoadStreamA_, typename SharedLoadStreamB_, typename Epilogue_, typename BlockSwizzle_ = IdentityBlockSwizzle, typename Index_ = int, typename ClearAccumulators_ = ClearAccumulators<typename GemmConfig_::Accumulators::Scalar>>
    +
    +template<typename GlobalLoadStream_, typename SharedLoadStream_>
    + + + + +
    SharedLoadStream_::SharedStorage cutlass::gemm::GemmTraits< GemmConfig_, GlobalLoadStreamA_, GlobalLoadStreamB_, SharedLoadStreamA_, SharedLoadStreamB_, Epilogue_, BlockSwizzle_, Index_, ClearAccumulators_ >::StreamSharedStorage< GlobalLoadStream_, SharedLoadStream_ >::shared
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html new file mode 100644 index 0000000000..02b68012e0 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage-members.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: Member List + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage Member List
    +
    + + + + + diff --git a/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html new file mode 100644 index 0000000000..77fcb12335 --- /dev/null +++ b/docs/generated-html/unioncutlass_1_1gemm_1_1GlobalLoadStreamBase_1_1SharedStorage.html @@ -0,0 +1,139 @@ + + + + + + + +Cutlass: cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage Union Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage Union Reference
    +
    +
    + +

    The storage in shared memory needed by that stream. +

    + +

    #include <gemm_global_stream.h>

    + + + + + + +

    +Public Attributes

    LoadIterator::SharedStorage load_iterator
     
    SharedStoreStorage store_iterator
     
    +

    Member Data Documentation

    + +

    ◆ load_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    LoadIterator::SharedStorage cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage::load_iterator
    +
    + +
    +
    + +

    ◆ store_iterator

    + +
    +
    +
    +template<typename LoadIterator_ , typename StoreIterator_ , typename Transformer_ >
    + + + + +
    SharedStoreStorage cutlass::gemm::GlobalLoadStreamBase< LoadIterator_, StoreIterator_, Transformer_ >::SharedStorage::store_iterator
    +
    + +
    +
    +
    The documentation for this union was generated from the following file: +
    + + + + diff --git a/docs/generated-html/vector_8h.html b/docs/generated-html/vector_8h.html new file mode 100644 index 0000000000..a3e0c090d8 --- /dev/null +++ b/docs/generated-html/vector_8h.html @@ -0,0 +1,165 @@ + + + + + + + +Cutlass: vector.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    vector.h File Reference
    +
    +
    + +

    Defines a 1D vector of elements held in the registers of each thread. +More...

    +
    #include <cuda_fp16.h>
    +#include <cutlass/util/platform.h>
    +
    +

    Go to the source code of this file.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  cutlass::AlignedStruct< kAlignment_ >
     
    union  cutlass::Vector< Scalar_, kLanes_ >
     
    union  cutlass::Vector< half, kLanes_ >
     
    struct  cutlass::Vectorize< Element_, kLanes_ >
     
    struct  cutlass::Vectorize< Element_, 1 >
     
    struct  cutlass::Extent< T >
     Returns the extent of a scalar or vector. More...
     
    struct  cutlass::Extent< Vector< T, Lanes > >
     Returns the number of lanes of a vector if need be. More...
     
    struct  cutlass::Extent< Vector< T, Lanes > const >
     Returns the number of lanes of a vector if need be. More...
     
    struct  cutlass::VectorTraits< T >
     Traits describing properties of vectors and scalar-as-vectors. More...
     
    struct  cutlass::VectorTraits< Vector< T, Lanes > >
     Partial specialization for actual cutlass::Vector. More...
     
    struct  cutlass::VectorTraits< Vector< T, Lanes > const >
     Partial specialization for actual cutlass::Vector. More...
     
    + + + +

    +Namespaces

     cutlass
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<>
    struct cutlass::__align__ (1) AlignedStruct< 1 >
     
    template<>
    struct cutlass::__align__ (2) AlignedStruct< 2 >
     
    template<>
    struct cutlass::__align__ (4) AlignedStruct< 4 >
     
    template<>
    struct cutlass::__align__ (8) AlignedStruct< 8 >
     
    template<>
    struct cutlass::__align__ (16) AlignedStruct< 16 >
     
    template<>
    struct cutlass::__align__ (32) AlignedStruct< 32 >
     
    template<>
    struct cutlass::__align__ (64) AlignedStruct< 64 >
     
    template<typename Scalar_ >
    CUTLASS_DEVICE void cutlass::make_zero (Scalar_ &x)
     
    template<typename Scalar_ , int kLanes_>
    CUTLASS_DEVICE void cutlass::make_zero (Vector< Scalar_, kLanes_ > &vec)
     
    +
    + + + + diff --git a/docs/generated-html/vector_8h_source.html b/docs/generated-html/vector_8h_source.html new file mode 100644 index 0000000000..735823858e --- /dev/null +++ b/docs/generated-html/vector_8h_source.html @@ -0,0 +1,120 @@ + + + + + + + +Cutlass: vector.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    vector.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
    31 #include <cuda_fp16.h>
    32 #endif
    33 
    34 #include <cutlass/util/platform.h>
    35 
    36 namespace cutlass {
    37 
    39 
    40 template <size_t kAlignment_>
    41 struct AlignedStruct {};
    42 
    43 template <>
    44 struct __align__(1) AlignedStruct<1>{};
    45 template <>
    46 struct __align__(2) AlignedStruct<2>{};
    47 template <>
    48 struct __align__(4) AlignedStruct<4>{};
    49 template <>
    50 struct __align__(8) AlignedStruct<8>{};
    51 template <>
    52 struct __align__(16) AlignedStruct<16>{};
    53 template <>
    54 struct __align__(32) AlignedStruct<32>{};
    55 template <>
    56 struct __align__(64) AlignedStruct<64>{};
    57 
    59 
    60 template <typename Scalar_, int kLanes_>
    61 union Vector {
    63  typedef Scalar_ Scalar;
    64 
    66  enum { kLanes = kLanes_ };
    68  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
    70  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
    71 
    72  // Make sure that the vector type makes sense.
    73  static_assert(kVectorSize <= 16, "Vector type is too large");
    74 
    80  uint32_t registers[kRegisters];
    81 
    83  CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const { return scalars[i]; }
    85  CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return scalars[i]; }
    86 };
    87 
    89 
    90 #if !defined(__CUDACC_RTC__) || defined(CUTLASS_NVRTC_HAS_FP16)
    91 
    92 template <int kLanes_>
    93 union Vector<half, kLanes_> {
    95  typedef half Scalar;
    96 
    98  enum { kLanes = kLanes_ };
    100  enum { kVectorSize = kLanes * (int)sizeof(Scalar) };
    102  enum { kRegisters = kVectorSize < 4 ? 1 : kVectorSize / 4 };
    103 
    104  // Make sure that the vector type makes sense.
    105  static_assert(kVectorSize <= size_t(16), "Vector type is too large");
    106 
    110  uint16_t scalars[kLanes];
    112  uint32_t registers[kRegisters];
    113 
    115  CUTLASS_DEVICE Scalar const& operator[](uint32_t i) const {
    116  return reinterpret_cast<Scalar const&>(scalars[i]);
    117  }
    119  CUTLASS_DEVICE Scalar& operator[](uint32_t i) { return reinterpret_cast<Scalar&>(scalars[i]); }
    120 };
    121 
    122 #endif
    123 
    125 
    126 template <typename Scalar_>
    127 CUTLASS_DEVICE void make_zero(Scalar_& x) {
    128  x = Scalar_(0);
    129 }
    130 
    132 
    133 template <typename Element_, int kLanes_ = 1>
    134 struct Vectorize {
    136 };
    137 
    139 
    140 template <typename Element_>
    141 struct Vectorize<Element_, 1> {
    142  typedef Element_ Type;
    143 };
    144 
    146 
    147 template <typename Scalar_, int kLanes_>
    148 CUTLASS_DEVICE void make_zero(Vector<Scalar_, kLanes_>& vec) {
    149  for (int i = 0; i < Vector<Scalar_, kLanes_>::kRegisters; ++i) {
    150  vec.registers[i] = 0;
    151  }
    152 }
    153 
    155 //
    156 // cutlass::Extent similar to std::extent but applicable to CUTLASS types
    157 //
    158 
    160 template <typename T>
    161 struct Extent {
    162  static size_t const kValue = 1;
    163 };
    164 
    166 template <typename T, int Lanes>
    167 struct Extent<Vector<T, Lanes> > {
    168  static size_t const kValue = Lanes;
    169 };
    170 
    172 template <typename T, int Lanes>
    173 struct Extent<Vector<T, Lanes> const> {
    174  static size_t const kValue = Lanes;
    175 };
    176 
    178 
    180 template <typename T>
    181 struct VectorTraits {
    183  typedef T Scalar;
    184 
    186  static int const kLanes = 1;
    187 
    189  static bool const IsVector = false;
    190 
    193 };
    194 
    196 template <typename T, int Lanes>
    197 struct VectorTraits<Vector<T, Lanes> > {
    199  typedef T Scalar;
    200 
    202  static int const kLanes = Lanes;
    203 
    205  static bool const IsVector = true;
    206 
    209 };
    210 
    212 template <typename T, int Lanes>
    213 struct VectorTraits<Vector<T, Lanes> const> {
    215  typedef T Scalar;
    216 
    218  static int const kLanes = Lanes;
    219 
    221  static bool const IsVector = true;
    222 
    225 };
    226 
    228 
    229 } // namespace cutlass
    Element_ Type
    Definition: vector.h:142
    +
    Definition: convert.h:33
    +
    Definition: vector.h:134
    +
    CUTLASS_DEVICE void make_zero(Scalar_ &x)
    Definition: vector.h:127
    +
    Definition: vector.h:41
    +
    T Scalar
    Scalar type.
    Definition: vector.h:183
    +
    struct __align__(1) AlignedStruct< 1 >
    Definition: vector.h:44
    +
    C++ features that may be otherwise unimplemented for CUDA device functions.
    +
    Scalar_ Scalar
    The scalar type.
    Definition: vector.h:63
    +
    Definition: vector.h:66
    +
    half Scalar
    The scalar type.
    Definition: vector.h:95
    +
    uint32_t registers[kRegisters]
    The data in registers.
    Definition: vector.h:80
    +
    Vector< T, 1 > Vector
    Type that is always a vector.
    Definition: vector.h:192
    +
    CUTLASS_DEVICE Scalar & operator[](uint32_t i)
    Accessor to the ith lane.
    Definition: vector.h:119
    +
    CUTLASS_DEVICE Scalar & operator[](uint32_t i)
    Accessor to the ith lane.
    Definition: vector.h:85
    +
    Traits describing properties of vectors and scalar-as-vectors.
    Definition: vector.h:181
    +
    #define static_assert(__e, __m)
    Definition: platform.h:145
    +
    Definition: vector.h:61
    +
    static bool const IsVector
    True if the type is actually a cutlass::Vector, otherwise false.
    Definition: vector.h:189
    +
    Scalar scalars[kLanes]
    The associated array of scalars.
    Definition: vector.h:78
    +
    Vector< T, Lanes > Vector
    Type that is always a Vector.
    Definition: vector.h:224
    +
    Definition: vector.h:68
    +
    static int const kLanes
    Number of lanes of vector.
    Definition: vector.h:186
    +
    CUTLASS_DEVICE Scalar const & operator[](uint32_t i) const
    Accessor to the ith lane.
    Definition: vector.h:115
    +
    T Scalar
    Scalar type.
    Definition: vector.h:215
    +
    Vector< Element_, kLanes_ > Type
    Definition: vector.h:135
    +
    T Scalar
    Scalar type.
    Definition: vector.h:199
    +
    static size_t const kValue
    Definition: vector.h:162
    +
    AlignedStruct< kVectorSize > aligned_
    The aligned storage to make sure we have good alignment.
    Definition: vector.h:73
    +
    AlignedStruct< kVectorSize > aligned_
    The aligned storage to make sure we have good alignment.
    Definition: vector.h:105
    +
    Vector< T, Lanes > Vector
    Type that is always a Vector.
    Definition: vector.h:208
    +
    CUTLASS_DEVICE Scalar const & operator[](uint32_t i) const
    Accessor to the ith lane.
    Definition: vector.h:83
    +
    Returns the extent of a scalar or vector.
    Definition: vector.h:161
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__epilogue__traits_8h.html b/docs/generated-html/wmma__gemm__epilogue__traits_8h.html new file mode 100644 index 0000000000..31a795945f --- /dev/null +++ b/docs/generated-html/wmma__gemm__epilogue__traits_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_epilogue_traits.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_epilogue_traits.h File Reference
    +
    +
    + +

    Defines structural properties of WMMA GEMM's epilogue phase. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html b/docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html new file mode 100644 index 0000000000..92d9abc1ee --- /dev/null +++ b/docs/generated-html/wmma__gemm__epilogue__traits_8h_source.html @@ -0,0 +1,104 @@ + + + + + + + +Cutlass: wmma_gemm_epilogue_traits.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_epilogue_traits.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/wmma_matrix.h>
    31 #ifdef CUTLASS_USE_WMMA_API
    32 
    33 #include <cutlass/convert.h>
    34 #include <cutlass/coord.h>
    40 #include <cutlass/reshape_tile.h>
    41 #include <cutlass/tile_iterator.h>
    42 
    43 namespace cutlass {
    44 namespace gemm {
    45 
    47 
    48 template <typename GemmConfig_, typename EpilogueFunctor_, typename Index_ = int>
    49 struct WmmaGemmEpilogueTraitsHelper {
    51  typedef typename EpilogueFunctor_::Scalar Scalar;
    53  typedef typename GemmConfig_::OutputTile OutputTile;
    54 
    56  static int const kWmmasPerH =
    57  GemmConfig_::AccumulatorsPerWarp::kH / GemmConfig_::InstructionShape::kH;
    59  typedef Shape<1, 1, kWmmasPerH> Iterations;
    60  // The iteration strides in the H/W dimension.
    61  typedef Shape<0, 0, 0> Delta;
    63  typedef EpilogueFunctor_ Functor;
    64 
    66  typedef WmmaGemmSharedStoreTileDTraits<
    67  // The output layout.
    69  // The pointer is float.
    70  typename Functor::Scalar,
    71  // The output tile size.
    72  typename GemmConfig_::OutputTile,
    73  // The number of warps.
    74  typename GemmConfig_::Warps,
    75  // The shape of the instruction.
    76  typename GemmConfig_::InstructionShape>
    77  SharedStoreTileTraits;
    78 
    79  typedef WmmaMatrix<GemmOperand::kC,
    81  Scalar,
    82  typename GemmConfig_::InstructionShape>
    83  WmmaMatrix;
    84 
    86  typedef TileStoreIterator<SharedStoreTileTraits,
    87  typename SharedStoreTileTraits::Scalar,
    90  Index_,
    91  WmmaMatrix,
    93  SharedStoreIteratorD;
    94 
    96  typedef Copy<typename SharedStoreIteratorD::Fragment> SharedStoreTransformerD;
    97 
    99  typedef WmmaGemmSharedLoadTileDTraits<
    100  // The pointer.
    101  typename Functor::Scalar,
    102  // The tile size.
    103  typename SharedStoreIteratorD::Tile,
    104  // The number of threads.
    105  Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>,
    106  // The number of scalars per LDS.
    107  GemmConfig_::kScalarsPerLdsD>
    108  SharedLoadTileTraits;
    109 
    111  typedef TileLoadIterator<SharedLoadTileTraits,
    112  typename SharedLoadTileTraits::Scalar,
    115  SharedLoadIteratorD;
    116 
    118  typedef WmmaGemmGlobalIteratorCdTraits<
    119  // The pointer is float const.
    120  typename GemmConfig_::ScalarC const,
    121  // The tile has size (N / Iterations)xM in GEMM's terminology.
    122  Shape<1,
    123  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
    124  GemmConfig_::OutputTile::kW>,
    125  // The threads are distributed as warps x 32 (the traits may reorganize).
    126  Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>,
    127  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    128  GemmConfig_::kScalarsPerLdgC>
    129  GlobalLoadTileTraits;
    130 
    132  typedef WmmaGemmGlobalIteratorCd<GlobalLoadTileTraits, Index_> GlobalLoadIteratorC;
    134  typedef Copy<typename GlobalLoadIteratorC::Fragment> GlobalTransformerC;
    135 
    137  typedef WmmaGemmGlobalIteratorCdTraits<
    138  // The pointer is float.
    139  typename GemmConfig_::ScalarD,
    140  // The tile has size (N / Iterations)xM in GEMM's terminology.
    141  Shape<1,
    142  GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount,
    143  GemmConfig_::OutputTile::kW>,
    144  // The threads are distributed as warps x 32 (the traits may reorganize).
    145  Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>,
    146  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    147  GemmConfig_::kScalarsPerStgD>
    148  GlobalStoreTileTraits;
    149 
    151  typedef WmmaGemmGlobalIteratorCd<GlobalStoreTileTraits, Index_> GlobalStoreIteratorD;
    153  typedef Copy<typename GlobalStoreIteratorD::Fragment> GlobalTransformerD;
    154 };
    155 
    157 
    158 } // namespace gemm
    159 } // namespace cutlass
    160 
    161 #endif // defined CUTLASS_USE_WMMA_API
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    Definition: load_store.h:42
    +
    Definition: convert.h:33
    +
    Defines the Tile Traits concept and iterators for loading and storing to tiles efficiently.
    +
    Implements the BLAS linear scaling function alpha*AB + beta*C.
    +
    A Coord is a coordinate of arbitrary rank into a tensor or matrix.
    +
    Definition: tile_iterator.h:62
    +
    Definition: matrix_traits.h:43
    +
    Defines a type for restructuring a tile.
    +
    Definition: tile_iterator.h:67
    +
    Defines tile iterator traits for loading thread block-level tile from global memory.
    +
    static int const kCount
    The number of elements in the 4D space.
    Definition: shape.h:91
    +
    Definition: matrix_traits.h:36
    +
    Implements efficient loading of the thread block-level tile from global memory and storing to shared ...
    +
    Defines abstractions for managing loading and storing fragments to shared memory in the efficient GEM...
    +
    Defines conversion operations among Fragments of different base type.
    +
    Defines iterator traits for efficiently loading and storing fragment to and from shared memory...
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__global__tile_8h.html b/docs/generated-html/wmma__gemm__global__tile_8h.html new file mode 100644 index 0000000000..6c8b116f05 --- /dev/null +++ b/docs/generated-html/wmma__gemm__global__tile_8h.html @@ -0,0 +1,117 @@ + + + + + + + +Cutlass: wmma_gemm_global_tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    + +
    +
    wmma_gemm_global_tile.h File Reference
    +
    +
    + +

    Defines tile iterator traits for loading thread block-level tile from global memory. +More...

    + +

    Go to the source code of this file.

    + + + + + + + + + + + + +

    +Classes

    struct  cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >
     
    struct  cutlass::gemm::WmmaGemmGlobalIteratorCdTraits< Scalar_, Tile_, Threads_, kAccessSize_ >::ThreadOffset
     Computes the thread offset in (H, W) based on thread ID. More...
     
    struct  cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >
     
    struct  cutlass::gemm::WmmaGemmGlobalIteratorCd< TileTraits_, Index_ >::Params
     The params. More...
     
    + + + + + +

    +Namespaces

     cutlass
     
     cutlass::gemm
     
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__global__tile_8h_source.html b/docs/generated-html/wmma__gemm__global__tile_8h_source.html new file mode 100644 index 0000000000..4e58863d4a --- /dev/null +++ b/docs/generated-html/wmma__gemm__global__tile_8h_source.html @@ -0,0 +1,142 @@ + + + + + + + +Cutlass: wmma_gemm_global_tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_global_tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    31 
    32 namespace cutlass {
    33 namespace gemm {
    34 
    36 
    37 template <typename Scalar_, typename Tile_, typename Threads_, int kAccessSize_>
    38 struct WmmaGemmGlobalIteratorCdTraits : public GemmGlobalTileTraits<GemmOperand::kC,
    39  MatrixLayout::kColumnMajor,
    40  Scalar_,
    41  Tile_,
    42  Threads_,
    43  kAccessSize_> {
    47  Scalar_,
    48  Tile_,
    49  Threads_,
    50  kAccessSize_>
    52 
    55 
    57  struct ThreadOffset {
    59  Coord<4> operator()() const {
    60  int thread_offset_h = threadIdx.x / Base::Threads::kW;
    61  int thread_offset_w = threadIdx.x % Base::Threads::kW * Base::ThreadsDelta::kW;
    62 
    63  return make_Coord(0, thread_offset_h, thread_offset_w, 0);
    64  }
    65  };
    66 };
    67 
    69 
    70 template <typename TileTraits_, typename Index_ = int>
    71 struct WmmaGemmGlobalIteratorCd : public TileIteratorBase<TileTraits_,
    72  typename TileTraits_::Scalar,
    73  IteratorAdvance::kH,
    74  MemorySpace::kGlobal,
    75  Index_> {
    79  typedef TileTraits_ Traits;
    81  typedef TileIteratorBase<Traits,
    82  typename TileTraits_::Scalar,
    85  Index_>
    90  static MatrixLayout::Kind const kLayout = TileTraits_::kLayout;
    91 
    93  typedef typename TileTraits_::Scalar Scalar;
    95  typedef typename TileTraits_::Pointer Pointer;
    97  typedef typename TileTraits_::Threads Threads;
    99  typedef Index_ Index;
    101  typedef typename TileTraits_::ThreadOffset ThreadOffset;
    102 
    104  struct Params {
    115 
    118  Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w) {
    119  // The pointer.
    120  this->pointer = pointer;
    121  // Setup the base stride. One "group of threads" per column.
    122  stride_h = ld;
    123  // Each thread output 1 column per iteration. .
    124  inc_h = ld * TileTraits_::Threads::kH;
    125  inc_advance = inc_h + epilogue_stride_w;
    126 
    127  predicate_offset = n;
    128  predicate_inc_h = TileTraits_::Threads::kH;
    129  predicate_inc_advance = predicate_inc_h + epilogue_delta_w;
    130 
    131  // It worked.
    132  return 0;
    133  }
    134  };
    135 
    137 
    139 
    141  CUTLASS_DEVICE WmmaGemmGlobalIteratorCd() {}
    142 
    144  CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const& params,
    145  const Coord<3>& bounds,
    146  const Coord<3>& block,
    147  int const pointer_offset = 0,
    148  int const pred_offset = 0,
    149  ThreadOffset thread_offset_func = ThreadOffset())
    150 
    151  : params(params) {
    152  thread_offset = thread_offset_func();
    153  // Each warp works on a different column of the tile.
    154  int const h = thread_offset[1] + block[1];
    155  // Each lane writes a different element.
    156  int const w = thread_offset[2] + block[2];
    157  // Setup the pointer.
    158  this->params.pointer += ((h * params.stride_h + w) + pointer_offset);
    159 
    160  // Prepare the vector of predicates.
    161  for (int i = 0; i < Base::Iterations::kW; ++i) {
    162  predicates.set(i, w + i * Base::Delta::kW < bounds[2]);
    163  }
    164  this->params.predicate_offset -= (h + pred_offset);
    165  }
    166 
    168  CUTLASS_DEVICE void inc_c() {}
    170  CUTLASS_DEVICE void inc_w() {}
    172  CUTLASS_DEVICE void inc_h() {
    175  }
    177  CUTLASS_DEVICE void inc_d() {}
    179  CUTLASS_DEVICE void inc_advance() {
    182  }
    183 
    185  CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const {
    186  return predicates.at(w) && params.predicate_offset > 0;
    187  }
    188 
    191  Pointer data() { return params.pointer; }
    192 
    194  Pointer const data() const { return params.pointer; }
    195 
    198 };
    199 
    201 
    202 } // namespace gemm
    203 } // namespace cutlass
    TileTraits_::Threads Threads
    The threads.
    Definition: wmma_gemm_global_tile.h:97
    +
    Definition: convert.h:33
    +
    Defines iterators for efficiently loading and storing to global memory.
    +
    Definition: gemm_global_tile.h:70
    +
    CUTLASS_HOST_DEVICE bool at(int idx) const
    Accesses a bit within the predicate vector.
    Definition: predicate_vector.h:356
    +
    CUTLASS_DEVICE void inc_d()
    Increment the pointer in the D dimension.
    Definition: wmma_gemm_global_tile.h:177
    +
    Definition: load_store.h:43
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Index stride_h
    The stride in the H dimension to setup the thread in the block.
    Definition: wmma_gemm_global_tile.h:108
    +
    CUTLASS_DEVICE void inc_w()
    Increment the pointer in the W dimension.
    Definition: wmma_gemm_global_tile.h:170
    +
    Index_ Index
    The index.
    Definition: wmma_gemm_global_tile.h:99
    +
    TileTraits_::Scalar Scalar
    The scalar.
    Definition: wmma_gemm_global_tile.h:93
    +
    Definition: tile_iterator.h:62
    +
    Definition: matrix_traits.h:43
    +
    Params params
    Definition: wmma_gemm_global_tile.h:136
    +
    Index predicate_inc_h
    The strides to increment the predicate offset.
    Definition: wmma_gemm_global_tile.h:114
    +
    Pointer pointer
    The pointer.
    Definition: wmma_gemm_global_tile.h:106
    +
    CUTLASS_HOST_DEVICE Pointer const data() const
    Definition: wmma_gemm_global_tile.h:194
    +
    CUTLASS_DEVICE void inc_advance()
    Increment the pointer to move to the next iteration.
    Definition: wmma_gemm_global_tile.h:179
    +
    The params.
    Definition: wmma_gemm_global_tile.h:104
    +
    Index inc_h
    The strides to increment the pointer.
    Definition: wmma_gemm_global_tile.h:110
    +
    TileIteratorBase< Traits, typename TileTraits_::Scalar, IteratorAdvance::kH, MemorySpace::kGlobal, Index_ > Base
    The base class.
    Definition: wmma_gemm_global_tile.h:86
    +
    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd()
    Ctor.
    Definition: wmma_gemm_global_tile.h:141
    +
    Index predicate_offset
    The column offset to compute the predicate for the columns.
    Definition: wmma_gemm_global_tile.h:112
    +
    CUTLASS_HOST_DEVICE Coord< 4 > operator()() const
    Definition: wmma_gemm_global_tile.h:59
    +
    Definition: wmma_gemm_global_tile.h:71
    +
    Index predicate_inc_advance
    Definition: wmma_gemm_global_tile.h:114
    +
    TileTraits_::Pointer Pointer
    The pointer.
    Definition: wmma_gemm_global_tile.h:95
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > Delta
    Override the strides in each dimension between different loads/stores.
    Definition: wmma_gemm_global_tile.h:54
    + +
    A Shape implementing Layout Concept describing the dimensions of a cube.
    Definition: shape.h:64
    +
    Coord< 4 > thread_offset
    Definition: wmma_gemm_global_tile.h:138
    +
    Index inc_advance
    Definition: wmma_gemm_global_tile.h:110
    +
    static MatrixLayout::Kind const kLayout
    The layout.
    Definition: wmma_gemm_global_tile.h:90
    + +
    Definition: wmma_gemm_global_tile.h:38
    +
    Iterator for accessing a stripmined tile in memory.
    Definition: tile_iterator.h:102
    +
    TileTraits_::ThreadOffset ThreadOffset
    The thread offset functor.
    Definition: wmma_gemm_global_tile.h:101
    +
    Definition: matrix_traits.h:36
    +
    CUTLASS_HOST_DEVICE Pointer data()
    Returns the raw pointer.
    Definition: wmma_gemm_global_tile.h:191
    +
    static int const kW
    The width of the cube.
    Definition: shape.h:70
    +
    CUTLASS_HOST_DEVICE void set(int idx, bool value=true)
    Set a bit within the predicate vector.
    Definition: predicate_vector.h:364
    +
    Kind
    Definition: matrix_traits.h:36
    +
    GemmGlobalTileTraits< GemmOperand::kC, MatrixLayout::kColumnMajor, Scalar_, Tile_, Threads_, kAccessSize_ > Base
    The base class.
    Definition: wmma_gemm_global_tile.h:51
    +
    CUTLASS_DEVICE WmmaGemmGlobalIteratorCd(Params const &params, const Coord< 3 > &bounds, const Coord< 3 > &block, int const pointer_offset=0, int const pred_offset=0, ThreadOffset thread_offset_func=ThreadOffset())
    Ctor.
    Definition: wmma_gemm_global_tile.h:144
    +
    WmmaGemmGlobalIteratorCd< TileTraits_, Index_ > This_
    This class.
    Definition: wmma_gemm_global_tile.h:77
    +
    cutlass::PredicateVector< Base::Iterations::kW > predicates
    The predicates for the row.
    Definition: wmma_gemm_global_tile.h:197
    +
    Shape< 0, 0, Base::Delta::kW, Base::Delta::kC > ImmediateOffsetStrides
    Override the strides in each dimension between different loads/stores.
    Definition: wmma_gemm_global_tile.h:88
    +
    Computes the thread offset in (H, W) based on thread ID.
    Definition: wmma_gemm_global_tile.h:57
    +
    CUTLASS_DEVICE void inc_c()
    Increment the pointer in the C dimension.
    Definition: wmma_gemm_global_tile.h:168
    +
    CUTLASS_DEVICE void inc_h()
    Increment the pointer in the H dimension.
    Definition: wmma_gemm_global_tile.h:172
    +
    TileTraits_ Traits
    The traits.
    Definition: wmma_gemm_global_tile.h:79
    +
    CUTLASS_DEVICE bool valid(int d, int h, int w, int c) const
    Test the predicate.
    Definition: wmma_gemm_global_tile.h:185
    +
    CUTLASS_HOST_DEVICE int initialize(Pointer pointer, Index ld, Index n, Index epilogue_stride_w, Index epilogue_delta_w)
    Setup the params.
    Definition: wmma_gemm_global_tile.h:117
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__multiply__add_8h.html b/docs/generated-html/wmma__gemm__multiply__add_8h.html new file mode 100644 index 0000000000..b81b0189b2 --- /dev/null +++ b/docs/generated-html/wmma__gemm__multiply__add_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_multiply_add.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_multiply_add.h File Reference
    +
    +
    + +

    Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__multiply__add_8h_source.html b/docs/generated-html/wmma__gemm__multiply__add_8h_source.html new file mode 100644 index 0000000000..399aa11fa7 --- /dev/null +++ b/docs/generated-html/wmma__gemm__multiply__add_8h_source.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: wmma_gemm_multiply_add.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_multiply_add.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/wmma_matrix.h>
    31 #ifdef CUTLASS_USE_WMMA_API
    32 #include <cutlass/fragment.h>
    33 
    34 namespace cutlass {
    35 namespace gemm {
    36 
    38 
    39 template <MatrixLayout::Kind kLayoutA_,
    40  typename ScalarA_,
    41  MatrixLayout::Kind kLayoutB_,
    42  typename ScalarB_,
    43  MatrixLayout::Kind kLayoutC_,
    44  typename ScalarC_,
    45  typename AccumulatorsPerWarp_,
    46  typename InstructionShape_>
    47 struct WmmaGemmMultiplyAdd {
    49  typedef InstructionShape_ InstructionShape;
    51  typedef Shape<1, InstructionShape_::kH, InstructionShape_::kW> ThreadsPerWarp;
    53  typedef AccumulatorsPerWarp_ AccumulatorsPerWarp;
    55  typedef ScalarA_ ScalarA;
    57  typedef ScalarB_ ScalarB;
    59  typedef ScalarC_ ScalarC;
    62 
    64  typedef WmmaMatrix<GemmOperand::kA, kLayoutA_, ScalarA, InstructionShape> ElementA;
    66  typedef Fragment<ElementA, Iterations::kW> FragmentA;
    67 
    69  typedef WmmaMatrix<GemmOperand::kB, kLayoutB_, ScalarB, InstructionShape> ElementB;
    71  typedef Fragment<ElementB, Iterations::kH> FragmentB;
    72 
    74  typedef WmmaMatrix<GemmOperand::kC, kLayoutC_, ScalarC, InstructionShape> ElementC;
    76  typedef Fragment<ElementC, Iterations::kH * Iterations::kW> Accumulators;
    77 
    79  CUTLASS_DEVICE WmmaGemmMultiplyAdd() {}
    80 
    82  CUTLASS_DEVICE void multiply_add(FragmentA const& a,
    83  FragmentB const& b,
    84  Accumulators const& c,
    85  Accumulators& d) {
    86  for (int j = 0; j < Iterations::kH; ++j) {
    87  for (int i = 0; i < Iterations::kW; ++i) {
    88  // The input elements.
    89  ElementA const& elt_a = a[i];
    90  ElementB const& elt_b = b[j];
    91  ElementC const& elt_c = c[j * Iterations::kW + i];
    92 
    93  // The output element.
    94  ElementC& elt_d = d[j * Iterations::kW + i];
    95 
    96  // The wmma instruction.
    97  nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c);
    98  }
    99  }
    100  }
    101 };
    102 
    104 
    105 } // namespace gemm
    106 } // namespace cutlass
    107 
    108 #endif // defined CUTLASS_USE_WMMA_API
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    Definition: convert.h:33
    +
    Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
    Definition: shape.h:126
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__shared__tile_8h.html b/docs/generated-html/wmma__gemm__shared__tile_8h.html new file mode 100644 index 0000000000..e72f3a867a --- /dev/null +++ b/docs/generated-html/wmma__gemm__shared__tile_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_shared_tile.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_shared_tile.h File Reference
    +
    +
    + +

    Defines iterator traits for efficiently loading and storing fragment to and from shared memory, specialized for WMMA GEMM. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__shared__tile_8h_source.html b/docs/generated-html/wmma__gemm__shared__tile_8h_source.html new file mode 100644 index 0000000000..fe56588250 --- /dev/null +++ b/docs/generated-html/wmma__gemm__shared__tile_8h_source.html @@ -0,0 +1,103 @@ + + + + + + + +Cutlass: wmma_gemm_shared_tile.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_shared_tile.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    29 #pragma once
    30 
    31 #include <cutlass/wmma_matrix.h>
    32 #ifdef CUTLASS_USE_WMMA_API
    33 
    35 #include <cutlass/reshape_tile.h>
    36 
    37 namespace cutlass {
    38 namespace gemm {
    39 
    40 template <class>
    41 struct Debug {};
    42 
    44 
    45 template <MatrixLayout::Kind kLayout_,
    46  typename Scalar_,
    47  typename Tile_,
    48  typename Warps_,
    49  int kWarpStride_,
    50  typename Iterations_,
    51  typename Delta_,
    52  typename WmmaShape_>
    53 struct WmmaGemmSharedLoadTileATraits {
    55  static GemmOperand::Kind const kOperand = GemmOperand::kA;
    57  static MatrixLayout::Kind const kLayout = kLayout_;
    59  typedef Scalar_ Scalar;
    61  typedef Scalar const* Pointer;
    63  static int const kAccessSize = 1;
    65  typedef Tile_ Tile;
    67  typedef Warps_ Warps;
    69  static int const kWarpStride = kWarpStride_;
    71  typedef Iterations_ Iterations;
    73  typedef Delta_ Delta;
    75  typedef Delta_ ImmediateOffsetStrides;
    77  typedef WmmaShape_ WmmaShape;
    79  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    81  struct ThreadOffset {
    83  Coord<4> operator()() const {
    84  // The warp id.
    85  int const warp = threadIdx.x / kWarpSize;
    86  // The offset.
    87  int const offset = warp % Warps::kW * kWarpStride;
    88  return make_Coord(0, 0, offset, 0);
    89  }
    90  };
    91 };
    92 
    94 
    95 template <MatrixLayout::Kind kLayout_,
    96  typename Scalar_,
    97  typename Tile_,
    98  typename Warps_,
    99  int kWarpStride_,
    100  typename Iterations_,
    101  typename Delta_,
    102  typename WmmaShape_>
    103 struct WmmaGemmSharedLoadTileBTraits {
    105  static GemmOperand::Kind const kOperand = GemmOperand::kB;
    107  static MatrixLayout::Kind const kLayout = kLayout_;
    109  typedef Scalar_ Scalar;
    111  typedef Scalar const* Pointer;
    113  static int const kAccessSize = 1;
    115  typedef Tile_ Tile;
    117  typedef Warps_ Warps;
    119  static int const kWarpStride = kWarpStride_;
    121  typedef Iterations_ Iterations;
    123  typedef Delta_ Delta;
    125  typedef Delta_ ImmediateOffsetStrides;
    127  typedef WmmaShape_ WmmaShape;
    129  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    131  struct ThreadOffset {
    133  Coord<4> operator()() const {
    134  // The warp id.
    135  int const warp = threadIdx.x / kWarpSize;
    136  // The offset.
    137  int const offset = warp / Warps::kW * kWarpStride;
    138  return make_Coord(0, 0, offset, 0);
    139  }
    140  };
    141 };
    142 
    144 
    145 template <MatrixLayout::Kind kLayout_,
    146  typename Scalar_,
    147  typename OutputTile_,
    148  typename Warps_,
    149  typename WmmaShape_,
    150  int kSkew_ = 0>
    151 struct WmmaGemmSharedStoreTileDTraits {
    153  static GemmOperand::Kind const kOperand = GemmOperand::kC;
    155  static MatrixLayout::Kind const kLayout = kLayout_;
    157  typedef Scalar_ Scalar;
    158  // The access size
    159  static int const kAccessSize = 1;
    161  typedef Scalar* Pointer;
    163  typedef Warps_ Warps;
    165  typedef WmmaShape_ WmmaShape;
    167  static int const kSkew = kSkew_;
    169  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    171  typedef Shape<1, Warps_::kH * WmmaShape_::kH, OutputTile_::kW + kSkew_> Tile;
    173  typedef Shape<1, 1, OutputTile_::kW / Warps::kW / WmmaShape_::kW> Iterations;
    175  typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> Delta;
    177  typedef Shape<0, 0, Warps::kW * WmmaShape_::kW, 0> ImmediateOffsetStrides;
    178 
    180  struct ThreadOffset {
    182  Coord<4> operator()() const {
    183  // The warp id.
    184  int const warp = threadIdx.x / kWarpSize;
    185  // The starting column.
    186  int const h = warp / Warps::kW * WmmaShape::kH;
    187  // The w.
    188  int const w = warp % Warps::kW * WmmaShape::kW;
    189  // The offset.
    190  int const offset = h * Tile::kW + w;
    191  return make_Coord(0, 0, offset, 0);
    192  }
    193  };
    194 };
    195 
    197 
    198 template <typename Scalar_, typename Tile_, typename Threads_, int kScalarsPerLds_>
    199 struct WmmaGemmSharedLoadTileDTraits {
    201  typedef Scalar_ Scalar;
    203  typedef Scalar const* Pointer;
    205  static int const kAccessSize = kScalarsPerLds_;
    207  typedef typename ReshapeTile<Tile_, kScalarsPerLds_>::Tile Tile;
    209  typedef typename ReshapeThreads<Tile, Threads_>::Threads Threads;
    211  typedef Shape<1, Tile::kW * Tile::kC, Tile::kC> ThreadsStrides;
    213  static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared;
    214 
    216  typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerLds_> Delta;
    218  typedef Shape<0, Threads::kH * ShapeCount<Tile>::kWc, Threads::kW * kScalarsPerLds_>
    219  ImmediateOffsetStrides;
    221  typedef Shape<1, Tile::kH / Threads::kH, Tile::kW / Threads::kW, Tile::kC / kScalarsPerLds_>
    222  Iterations;
    223 
    225  struct ThreadOffset {
    227  Coord<4> operator()() const {
    228  // The offset.
    230  return make_Coord(0, 0, offset, 0);
    231  }
    232  };
    233 };
    234 
    236 
    237 } // namespace gemm
    238 } // namespace cutlass
    239 
    240 #endif // defined CUTLASS_USE_WMMA_API
    static CUTLASS_DEVICE int get()
    Definition: shape.h:253
    +
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    Definition: load_store.h:42
    +
    Definition: convert.h:33
    +
    CUTLASS_HOST_DEVICE Coord< 1 > make_Coord(int _0)
    Helper to make a 2-element coordinate.
    Definition: coord.h:241
    +
    Definition: matrix_traits.h:43
    +
    Kind
    Definition: load_store.h:40
    +
    Defines a type for restructuring a tile.
    +
    Defines constant expressions for mapping GEMM problem size and strides onto pitch-linear memory...
    +
    Definition: matrix_traits.h:43
    +
    #define CUTLASS_HOST_DEVICE
    Definition: cutlass.h:46
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Tile_ Tile
    Definition: reshape_tile.h:43
    +
    Kind
    Definition: matrix_traits.h:43
    +
    Definition: matrix_traits.h:43
    +
    Threads_ Threads
    Definition: gemm_global_tile.h:54
    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__traits_8h.html b/docs/generated-html/wmma__gemm__traits_8h.html new file mode 100644 index 0000000000..37eb3547ba --- /dev/null +++ b/docs/generated-html/wmma__gemm__traits_8h.html @@ -0,0 +1,93 @@ + + + + + + + +Cutlass: wmma_gemm_traits.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_traits.h File Reference
    +
    +
    + +

    Defies structural properties of GEMM targeting WMMA API in CUDA. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__gemm__traits_8h_source.html b/docs/generated-html/wmma__gemm__traits_8h_source.html new file mode 100644 index 0000000000..ad4cb28d1d --- /dev/null +++ b/docs/generated-html/wmma__gemm__traits_8h_source.html @@ -0,0 +1,109 @@ + + + + + + + +Cutlass: wmma_gemm_traits.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_gemm_traits.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #include <cutlass/wmma_matrix.h>
    31 #ifdef CUTLASS_USE_WMMA_API
    32 
    33 #include <cutlass/convert.h>
    34 #include <cutlass/gemm/gemm.h>
    43 
    44 namespace cutlass {
    45 namespace gemm {
    46 
    48 
    49 template <
    51  MatrixLayout::Kind kLayoutA_,
    53  MatrixLayout::Kind kLayoutB_,
    55  typename OutputTile_,
    57  typename ScalarC_,
    59  typename Accumulator_,
    61  typename AccumulatorsPerWarp_,
    63  typename InstructionShape_,
    65  int kScalarsPerLdgA_,
    67  int kScalarsPerLdgB_>
    68 struct WmmaGemmConfig : public GemmConfig<
    70  half,
    72  half,
    74  ScalarC_,
    76  ScalarC_,
    78  OutputTile_,
    80  WmmaGemmMultiplyAdd<kLayoutA_,
    81  half,
    82  kLayoutB_,
    83  half,
    84  MatrixLayout::kColumnMajor,
    85  Accumulator_,
    86  AccumulatorsPerWarp_,
    87  InstructionShape_>,
    89  kScalarsPerLdgA_,
    91  kScalarsPerLdgA_,
    93  8,
    95  kScalarsPerLdgB_,
    97  kScalarsPerLdgB_,
    99  8,
    101  16 / sizeof(ScalarC_),
    103  16 / sizeof(ScalarC_),
    105  16 / sizeof(ScalarC_),
    107  1> {};
    108 
    110 
    111 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    112 struct WmmaGemmTileTraitsHelperA {};
    113 
    115 
    116 template <typename GemmConfig_>
    117 struct WmmaGemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_>
    118  : public GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> {
    120  typedef GemmTileTraitsHelperA<MatrixLayout::kColumnMajor, GemmConfig_> Base;
    121 
    123  static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar);
    125  typedef Shape<GemmConfig_::kStages,
    126  GemmConfig_::OutputTile::kD,
    127  GemmConfig_::OutputTile::kW + kSkew>
    128  Tile;
    129 
    131  typedef WmmaMatrix<GemmOperand::kA,
    133  typename Base::MultiplyAddScalar,
    134  typename GemmConfig_::InstructionShape>
    135  WmmaMatrix;
    136 
    138  typedef GemmSharedStoreTileAbTraits<
    139  // The pointer.
    140  typename Base::MultiplyAddScalar,
    141  // The tile has size KxM in GEMM's terminology.
    142  Tile,
    143  // The threads are distributed as warps x 32 (the traits may reorganize).
    144  typename Base::GlobalTileTraits::Threads,
    145  // The number of scalars per STS (STS.32 or STS.128, etc).
    146  GemmConfig_::kScalarsPerStsA>
    147  SharedStoreTileTraits;
    148 
    150  static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW;
    152  static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD;
    154  typedef WmmaGemmSharedLoadTileATraits<
    155  // The layout of the matrix.
    157  // The pointer.
    158  typename Base::MultiplyAddScalar,
    159  // The output tile size.
    160  Tile,
    161  // The number of warps.
    162  typename GemmConfig_::Warps,
    163  // The strides between warps.
    164  GemmConfig_::InstructionShape::kW,
    165  // The number of iterations to load the data.
    166  Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>,
    167  // The stride between iterations.
    168  Shape<kScalarsPerIteration, 0, kScalarsPerW, 0>,
    169  // The shape of the instruction.
    170  typename GemmConfig_::InstructionShape>
    171  SharedLoadTileTraits;
    172 };
    173 
    175 
    176 template <typename GemmConfig_>
    177 struct WmmaGemmTileTraitsHelperA<MatrixLayout::kRowMajor, GemmConfig_> {
    179  static MatrixLayout::Kind const kLayout = MatrixLayout::kRowMajor;
    180 
    182  typedef typename GemmConfig_::ScalarA Scalar;
    184  typedef typename GemmConfig_::MultiplyAdd::ScalarA MultiplyAddScalar;
    185 
    187  typedef WmmaMatrix<GemmOperand::kA,
    189  MultiplyAddScalar,
    190  typename GemmConfig_::InstructionShape>
    191  WmmaMatrix;
    192 
    194  typedef GemmGlobalTileTraits<
    195  // That's A.
    197  // A is row-major.
    199  // The pointer is float const.
    200  Scalar const,
    201  // The tile has size KxM in GEMM's terminology.
    202  Shape<1, GemmConfig_::OutputTile::kW, GemmConfig_::OutputTile::kD>,
    203  // The threads are distributed as warps x 32 (the traits may reorganize).
    204  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    205  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    206  GemmConfig_::kScalarsPerLdgA>
    207  GlobalTileTraits;
    208 
    210  static int const kSkew = 16 / sizeof(MultiplyAddScalar);
    212  typedef Shape<GemmConfig_::kStages,
    213  GemmConfig_::OutputTile::kW,
    214  GemmConfig_::OutputTile::kD + kSkew>
    215  Tile;
    216 
    218  typedef GemmSharedStoreTileAbTraits<
    219  // The pointer.
    220  MultiplyAddScalar,
    221  // The tile has size KxM in GEMM's terminology.
    222  Tile,
    223  // The threads are distributed as warps x 32 (the traits may reorganize).
    224  typename GlobalTileTraits::Threads,
    225  // The number of scalars per STS (STS.32 or STS.128, etc).
    226  GemmConfig_::kScalarsPerStsA>
    227  SharedStoreTileTraits;
    228 
    230  static int const kScalarsPerW = GemmConfig_::InstructionShape::kW * GemmConfig_::Warps::kW;
    232  typedef WmmaGemmSharedLoadTileATraits<
    233  // The layout of the matrix.
    235  // The pointer.
    236  MultiplyAddScalar,
    237  // The tile in shared memory.
    238  Tile,
    239  // The number of warps.
    240  typename GemmConfig_::Warps,
    241  // The strides between warps.
    242  GemmConfig_::InstructionShape::kW * Tile::kW,
    243  // The number of iterations to load the data.
    244  Shape<1, 1, GemmConfig_::OutputTile::kW / kScalarsPerW>,
    245  // The stride between iterations.
    246  Shape<GemmConfig_::InstructionShape::kD, 0, kScalarsPerW * Tile::kW>,
    247  // The shape of the instruction.
    248  typename GemmConfig_::InstructionShape>
    249  SharedLoadTileTraits;
    250 };
    251 
    253 
    254 template <enum MatrixLayout::Kind kLayout_, typename GemmConfig_>
    255 struct WmmaGemmTileTraitsHelperB {};
    256 
    258 
    259 template <typename GemmConfig_>
    260 struct WmmaGemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_>
    261  : public GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> {
    263  typedef GemmTileTraitsHelperB<MatrixLayout::kRowMajor, GemmConfig_> Base;
    264 
    266  static int const kSkew = 16 / sizeof(typename Base::MultiplyAddScalar);
    268  typedef Shape<GemmConfig_::kStages,
    269  GemmConfig_::OutputTile::kD,
    270  GemmConfig_::OutputTile::kH + kSkew>
    271  Tile;
    272 
    274  typedef WmmaMatrix<GemmOperand::kB,
    276  typename Base::MultiplyAddScalar,
    277  typename GemmConfig_::InstructionShape>
    278  WmmaMatrix;
    279 
    281  typedef GemmSharedStoreTileAbTraits<
    282  // The pointer.
    283  typename Base::MultiplyAddScalar,
    284  // The tile has size KxM in GEMM's terminology.
    285  Tile,
    286  // The threads are distributed as warps x 32 (the traits may reorganize).
    287  typename Base::GlobalTileTraits::Threads,
    288  // The number of scalars per STS (STS.32 or STS.128, etc).
    289  GemmConfig_::kScalarsPerStsB>
    290  SharedStoreTileTraits;
    291 
    293  static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH;
    295  static int const kScalarsPerIteration = Tile::kW * GemmConfig_::InstructionShape::kD;
    297  typedef WmmaGemmSharedLoadTileBTraits<
    298  // The layout of the matrix.
    300  // The pointer.
    301  typename Base::MultiplyAddScalar,
    302  // The output tile size.
    303  Tile,
    304  // The number of warps.
    305  typename GemmConfig_::Warps,
    306  // The strides between warps.
    307  GemmConfig_::InstructionShape::kH,
    308  // The number of iterations to load the data.
    309  Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>,
    310  // The stride between iterations.
    311  Shape<kScalarsPerIteration, 0, kScalarsPerW, 0>,
    312  // The shape of the instruction.
    313  typename GemmConfig_::InstructionShape>
    314  SharedLoadTileTraits;
    315 };
    316 
    318 
    319 template <typename GemmConfig_>
    320 struct WmmaGemmTileTraitsHelperB<MatrixLayout::kColumnMajor, GemmConfig_> {
    322  static MatrixLayout::Kind const kLayout = MatrixLayout::kColumnMajor;
    323 
    325  typedef typename GemmConfig_::ScalarB Scalar;
    327  typedef typename GemmConfig_::MultiplyAdd::ScalarB MultiplyAddScalar;
    328 
    330  typedef WmmaMatrix<GemmOperand::kB,
    332  MultiplyAddScalar,
    333  typename GemmConfig_::InstructionShape>
    334  WmmaMatrix;
    335 
    337  typedef GemmGlobalTileTraits<
    338  // That's B.
    340  // A is row-major.
    342  // The pointer is float const.
    343  Scalar const,
    344  // The tile has size KxM in GEMM's terminology.
    345  Shape<1, GemmConfig_::OutputTile::kH, GemmConfig_::OutputTile::kD>,
    346  // The threads are distributed as warps x 32 (the traits may reorganize).
    347  Shape<1, GemmConfig_::kThreads / GemmConfig_::OutputTile::kD, GemmConfig_::OutputTile::kD>,
    348  // The number of scalars per LDG (LDG.32 or LDG.128, etc).
    349  GemmConfig_::kScalarsPerLdgB>
    350  GlobalTileTraits;
    351 
    353  static int const kSkew = 16 / sizeof(MultiplyAddScalar);
    355  typedef Shape<GemmConfig_::kStages,
    356  GemmConfig_::OutputTile::kH,
    357  GemmConfig_::OutputTile::kD + kSkew>
    358  Tile;
    359 
    361  typedef GemmSharedStoreTileAbTraits<
    362  // The pointer.
    363  MultiplyAddScalar,
    364  // The tile has size KxM in GEMM's terminology.
    365  Tile,
    366  // The threads are distributed as warps x 32 (the traits may reorganize).
    367  typename GlobalTileTraits::Threads,
    368  // The number of scalars per STS (STS.32 or STS.128, etc).
    369  GemmConfig_::kScalarsPerStsB>
    370  SharedStoreTileTraits;
    371 
    373  static int const kScalarsPerW = GemmConfig_::InstructionShape::kH * GemmConfig_::Warps::kH;
    375  typedef WmmaGemmSharedLoadTileBTraits<
    376  // The layout of the matrix.
    378  // The pointer.
    379  MultiplyAddScalar,
    380  // The tile in shared memory.
    381  Tile,
    382  // The number of warps.
    383  typename GemmConfig_::Warps,
    384  // The strides between warps.
    385  GemmConfig_::InstructionShape::kH * Tile::kW,
    386  // The number of iterations to load the data.
    387  Shape<1, 1, GemmConfig_::OutputTile::kH / kScalarsPerW>,
    388  // The stride between iterations.
    389  Shape<GemmConfig_::InstructionShape::kD, 0, kScalarsPerW * Tile::kW>,
    390  // The shape of the instruction.
    391  typename GemmConfig_::InstructionShape>
    392  SharedLoadTileTraits;
    393 };
    394 
    396 
    397 template <
    399  MatrixLayout::Kind kLayoutA_,
    401  MatrixLayout::Kind kLayoutB_,
    403  typename OutputTile_,
    405  typename ScalarC_,
    407  typename Accumulator_,
    409  typename EpilogueFunctor_,
    411  typename AccumulatorsPerWarp_,
    413  typename InstructionShape_,
    415  int kScalarsPerLdgA_,
    417  int kScalarsPerLdgB_,
    419  typename Index_>
    420 struct WmmaGemmTraitsHelper {
    422  typedef WmmaGemmConfig<kLayoutA_,
    423  kLayoutB_,
    424  OutputTile_,
    425  ScalarC_,
    426  Accumulator_,
    427  AccumulatorsPerWarp_,
    428  InstructionShape_,
    429  kScalarsPerLdgA_,
    430  kScalarsPerLdgB_>
    431  GemmConfig;
    432 
    434  typedef WmmaGemmTileTraitsHelperA<kLayoutA_, GemmConfig> GemmTileTraitsHelperA;
    436  typedef WmmaGemmTileTraitsHelperB<kLayoutB_, GemmConfig> GemmTileTraitsHelperB;
    437 
    439  typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperA::GlobalTileTraits, Index_>
    440  GlobalLoadIteratorA;
    442  typedef Copy<typename GlobalLoadIteratorA::Fragment> GlobalTransformerA;
    444  typedef TileStoreIterator<typename GemmTileTraitsHelperA::SharedStoreTileTraits,
    445  typename GemmTileTraitsHelperA::SharedStoreTileTraits::Scalar,
    448  SharedStoreIteratorA;
    450  typedef GlobalLoadStream<GlobalLoadIteratorA, SharedStoreIteratorA, GlobalTransformerA>
    451  GlobalLoadStreamA;
    452 
    454  typedef GemmGlobalIteratorAb<typename GemmTileTraitsHelperB::GlobalTileTraits, Index_>
    455  GlobalLoadIteratorB;
    456  // The default transformer for B.
    457  typedef Copy<typename GlobalLoadIteratorB::Fragment> GlobalTransformerB;
    459  typedef TileStoreIterator<typename GemmTileTraitsHelperB::SharedStoreTileTraits,
    460  typename GemmTileTraitsHelperB::SharedStoreTileTraits::Scalar,
    463  SharedStoreIteratorB;
    465  typedef GlobalLoadStream<GlobalLoadIteratorB, SharedStoreIteratorB, GlobalTransformerB>
    466  GlobalLoadStreamB;
    467 
    469  typedef TileLoadIterator<typename GemmTileTraitsHelperA::SharedLoadTileTraits,
    470  typename GemmTileTraitsHelperA::SharedLoadTileTraits::Scalar,
    473  Index_,
    474  typename GemmTileTraitsHelperA::WmmaMatrix,
    476  SharedLoadIteratorA;
    478  typedef SharedLoadStream<SharedLoadIteratorA> SharedLoadStreamA;
    480  typedef TileLoadIterator<typename GemmTileTraitsHelperB::SharedLoadTileTraits,
    481  typename GemmTileTraitsHelperB::SharedLoadTileTraits::Scalar,
    484  Index_,
    485  typename GemmTileTraitsHelperB::WmmaMatrix,
    487  SharedLoadIteratorB;
    489  typedef SharedLoadStream<SharedLoadIteratorB> SharedLoadStreamB;
    490 
    492  typedef typename GemmConfig::MultiplyAdd MultiplyAdd;
    494  typedef ClearAccumulators<typename MultiplyAdd::ScalarC> ClearAccumulators;
    495 
    497  typedef WmmaGemmEpilogueTraitsHelper<GemmConfig, EpilogueFunctor_, Index_> EpilogueTraitsHelper;
    499  typedef SimplifiedGemmEpilogueTraits<GemmConfig, EpilogueFunctor_, Index_, EpilogueTraitsHelper>
    500  GemmEpilogueTraits;
    502  typedef GemmEpilogue<GemmEpilogueTraits> Epilogue;
    503 };
    504 
    506 
    507 template <typename OutputTile_, typename DefaultShape_ = Shape<64, 32, 64> >
    508 struct WmmaGemmAccumulatorsPerWarp {
    509  typedef typename ShapeMin<OutputTile_, DefaultShape_>::Shape Shape;
    510 };
    511 
    513 
    514 template <
    516  MatrixLayout::Kind kLayoutA_,
    518  MatrixLayout::Kind kLayoutB_,
    520  typename OutputTile_ = Shape<64, 128, 128>,
    522  typename ScalarC_ = float,
    524  typename EpilogueFunctor_ = LinearScaling<ScalarC_>,
    526  typename Accumulator_ = ScalarC_,
    528  typename AccumulatorsPerWarp_ = typename WmmaGemmAccumulatorsPerWarp<OutputTile_>::Shape,
    530  typename InstructionShape_ = Shape<16, 16, 16>,
    532  int kScalarsPerLdgA_ = 8,
    534  int kScalarsPerLdgB_ = 8,
    536  typename Index_ = int,
    538  typename Helper_ = WmmaGemmTraitsHelper<kLayoutA_,
    539  kLayoutB_,
    540  OutputTile_,
    541  ScalarC_,
    542  Accumulator_,
    543  EpilogueFunctor_,
    544  AccumulatorsPerWarp_,
    545  InstructionShape_,
    546  kScalarsPerLdgA_,
    547  kScalarsPerLdgB_,
    548  Index_> >
    549 struct WmmaGemmTraits : public GemmTraits<
    550  // The config.
    551  typename Helper_::GemmConfig,
    552  // The stream to load A from global memory to shared memory.
    553  typename Helper_::GlobalLoadStreamA,
    554  // The stream to load B from global memory to shared memory.
    555  typename Helper_::GlobalLoadStreamB,
    556  // The stream to load A from shared memory.
    557  typename Helper_::SharedLoadStreamA,
    558  // The stream to load B from shared memory.
    559  typename Helper_::SharedLoadStreamB,
    560  // The epilogue.
    561  typename Helper_::Epilogue,
    562  // The block swizzle to reorganize the grid.
    563  IdentityBlockSwizzle,
    564  // The index.
    565  Index_,
    566  // The tool used to clear accumulators.
    567  typename Helper_::ClearAccumulators> {};
    568 
    570 
    571 } // namespace gemm
    572 } // namespace cutlass
    573 
    574 #endif // defined CUTLASS_USE_WMMA_API
    Abstractions for loading and storing matrices using the CUDA WMMA API.
    +
    MultiplyAdd_ MultiplyAdd
    The functor to do D = A*B + C.
    Definition: gemm_traits.h:93
    +
    Definition: load_store.h:42
    +
    Definition: convert.h:33
    +
    Defines iterators for efficiently loading and storing to global memory.
    +
    Defines structural properties of complete GEMM computation.
    +
    Defines structural properties of WMMA GEMM&#39;s epilogue phase.
    +
    Definition: tile_iterator.h:62
    +
    Implements the epilogue phase of the GEMM kernel that efficiently updates global memory with the comp...
    +
    Defines iterators for efficiently loading and storing tiles to and from shared memory.
    +
    Definition: matrix_traits.h:36
    +
    Definition: tile_iterator.h:67
    +
    Definition: matrix_traits.h:43
    +
    Defines tile iterator traits for loading thread block-level tile from global memory.
    +
    Definition: matrix_traits.h:36
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Implements warp-level matrix multiply-accumulate operation using CUDA WMMA API.
    +
    Definition: matrix_traits.h:43
    +
    Implements a software-pipelined efficient GEMM.
    +
    Defines structural properties of the GEMM epilogue.
    +
    Shape<(A_::kD< B_::kD ? A_::kD :B_::kD),(A_::kH< B_::kH ? A_::kH :B_::kH),(A_::kW< B_::kW ? A_::kW :B_::kW),(A_::kC< B_::kC ? A_::kC :B_::kC)> Shape
    Definition: shape.h:148
    +
    Defines conversion operations among Fragments of different base type.
    +
    + + + + diff --git a/docs/generated-html/wmma__matrix_8h.html b/docs/generated-html/wmma__matrix_8h.html new file mode 100644 index 0000000000..fd1ab8cf9f --- /dev/null +++ b/docs/generated-html/wmma__matrix_8h.html @@ -0,0 +1,92 @@ + + + + + + + +Cutlass: wmma_matrix.h File Reference + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_matrix.h File Reference
    +
    +
    + +

    Abstractions for loading and storing matrices using the CUDA WMMA API. +More...

    + +

    Go to the source code of this file.

    +
    + + + + diff --git a/docs/generated-html/wmma__matrix_8h_source.html b/docs/generated-html/wmma__matrix_8h_source.html new file mode 100644 index 0000000000..9e91b6859f --- /dev/null +++ b/docs/generated-html/wmma__matrix_8h_source.html @@ -0,0 +1,96 @@ + + + + + + + +Cutlass: wmma_matrix.h Source File + + + + + + + + + + +
    +
    + + + + + + +
    +
    Cutlass +
    +
    CUDA Templates for Linear Algebra Subroutines and Solvers
    +
    +
    + + + + + + + + +
    +
    + + +
    + +
    + + +
    +
    +
    +
    wmma_matrix.h
    +
    +
    +Go to the documentation of this file.
    1 /***************************************************************************************************
    2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without modification, are permitted
    5  * provided that the following conditions are met:
    6  * * Redistributions of source code must retain the above copyright notice, this list of
    7  * conditions and the following disclaimer.
    8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
    9  * conditions and the following disclaimer in the documentation and/or other materials
    10  * provided with the distribution.
    11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
    12  * to endorse or promote products derived from this software without specific prior written
    13  * permission.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
    17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
    19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
    20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    23  *
    24  **************************************************************************************************/
    28 #pragma once
    29 
    30 #if defined(__CUDACC__) && (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700)
    31 
    32 // Dependent header files should use the following macro to guard all code using
    33 // nvcuda::wmma:: to enable compilation for CUDA Compute Capabilities < sm_70.
    34 // Earlier shader models not support Tensor Cores.
    35 #define CUTLASS_USE_WMMA_API
    36 
    37 #include "stdio.h"
    38 
    39 #include <crt/mma.h>
    40 #include <cutlass/fragment.h>
    41 #include <cutlass/load_store.h>
    42 #include <cutlass/matrix_traits.h>
    43 #include <cutlass/shape.h>
    44 #include <cutlass/vector.h>
    45 
    46 namespace cutlass {
    47 
    49 
    51 template <MatrixLayout::Kind kLayout_>
    52 struct WmmaLayout {
    53  typedef nvcuda::wmma::col_major Layout;
    54 };
    55 
    57 template <>
    58 struct WmmaLayout<MatrixLayout::kRowMajor> {
    59  typedef nvcuda::wmma::row_major Layout;
    60 };
    61 
    63 
    65 template <GemmOperand::Kind kOperand_,
    66  MatrixLayout::Kind kLayout_,
    67  typename Scalar_,
    68  typename WmmaShape_>
    69 struct WmmaMatrix {};
    70 
    72 
    74 template <MatrixLayout::Kind kLayout_, typename Scalar_, typename WmmaShape_>
    75 struct WmmaMatrix<GemmOperand::kA, kLayout_, Scalar_, WmmaShape_>
    76  : public nvcuda::wmma::fragment<
    78  nvcuda::wmma::matrix_a,
    80  WmmaShape_::kW,
    81  WmmaShape_::kH,
    82  WmmaShape_::kD,
    84  Scalar_,
    86  typename WmmaLayout<kLayout_>::Layout> {
    88  typedef WmmaMatrix<GemmOperand::kA, kLayout_, Scalar_, WmmaShape_> This_;
    89 
    91  CUTLASS_DEVICE This_& operator=(Scalar_ const& x) {
    92  nvcuda::wmma::fill_fragment(*this, x);
    93  return *this;
    94  }
    95 
    97  CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) {
    98  nvcuda::wmma::load_matrix_sync(*this, pointer, stride);
    99  }
    100 
    102  CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const {
    103  nvcuda::wmma::store_matrix_sync(pointer, *this, stride);
    104  }
    105 };
    106 
    108 
    110 template <MatrixLayout::Kind kLayout_, typename Scalar_, typename WmmaShape_>
    111 struct WmmaMatrix<GemmOperand::kB, kLayout_, Scalar_, WmmaShape_>
    112  : public nvcuda::wmma::fragment<
    114  nvcuda::wmma::matrix_b,
    116  WmmaShape_::kW,
    117  WmmaShape_::kH,
    118  WmmaShape_::kD,
    120  Scalar_,
    122  typename WmmaLayout<kLayout_>::Layout> {
    124  typedef WmmaMatrix<GemmOperand::kB, kLayout_, Scalar_, WmmaShape_> This_;
    125 
    127  CUTLASS_DEVICE This_& operator=(Scalar_ const& x) {
    128  nvcuda::wmma::fill_fragment(*this, x);
    129  return *this;
    130  }
    131 
    133  CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) {
    134  nvcuda::wmma::load_matrix_sync(*this, pointer, stride);
    135  }
    136 
    138  CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const {
    139  nvcuda::wmma::store_matrix_sync(pointer, *this, stride);
    140  }
    141 };
    142 
    144 
    146 template <MatrixLayout::Kind kLayout_, typename Scalar_, typename WmmaShape_>
    147 struct WmmaMatrix<GemmOperand::kC, kLayout_, Scalar_, WmmaShape_>
    148  : public nvcuda::wmma::fragment<
    150  nvcuda::wmma::accumulator,
    152  WmmaShape_::kW,
    153  WmmaShape_::kH,
    154  WmmaShape_::kD,
    156  Scalar_> {
    158  typedef WmmaMatrix<GemmOperand::kC, kLayout_, Scalar_, WmmaShape_> This_;
    160  static MatrixLayout::Kind const kLayout = kLayout_;
    161 
    163  CUTLASS_DEVICE This_& operator=(Scalar_ const& x) {
    164  nvcuda::wmma::fill_fragment(*this, x);
    165  return *this;
    166  }
    167 
    169  CUTLASS_DEVICE void load(Scalar_ const* pointer, int const stride) {
    170  bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor;
    171  nvcuda::wmma::load_matrix_sync(
    172  *this,
    173  pointer,
    174  stride,
    175  kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major);
    176  }
    177 
    179  CUTLASS_DEVICE void store(Scalar_* pointer, int const stride) const {
    180  bool const kIsRowMajor = kLayout == MatrixLayout::kRowMajor;
    181  nvcuda::wmma::store_matrix_sync(
    182  pointer,
    183  *this,
    184  stride,
    185  kIsRowMajor ? nvcuda::wmma::mem_row_major : nvcuda::wmma::mem_col_major);
    186  }
    187 };
    188 
    190 
    191 } // namespace cutlass
    192 
    193 #endif // defined CUTLASS_USE_WMMA_API
    Definition: convert.h:33
    +
    Definition: matrix_traits.h:36
    +
    Defines abstractions for efficiently loading and storing vectors to memory.
    +
    Defines a 1D vector of elements held in the registers of each thread.
    +
    Kind
    Definition: matrix_traits.h:36
    +
    Kind
    Definition: matrix_traits.h:43
    +
    Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.
    +
    Defines properties of matrices used to denote layout and operands to GEMM kernels.
    +
    Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...
    +
    + + + + diff --git a/media/cutlass-performance-plot.png b/media/cutlass-performance-plot.png deleted file mode 100644 index 96171ed0dd9267f1ab7177f440ebb2a063a5ddce..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40183 zcmb@uWk8f`^!7{XPy!OtDS{wf5<_d}N=SEybTf3rxySvU z_nfci{htqe?}05c&&+eLwXWZFtqFOnDE$PT1RVhZ;fbt_#5)88q%Z^o#1YiT;5*o8 zep=u^h<5L!#Slsc$+p24D8{01L=h0mBQS3D9)YjXzQ}0UAs|rm!2dz~OX-n@fRGX@ zDQ&JfDIziVSnUSKUh~o>y4Q=& z*{9oM-QfdS^6@Okr(;U;ch_e>)Jt_n%MGOKP6h?`i;Z25eiOccLbWJd)}CtoG7NEl zxVt!R$6(#6Cg%LyGngvy;`wuN4o=P=;J4ULX*({Yu+W0_>JP_O3e>r2ba`{DNP?gcx$GR&;~`lm+4p5Td85El8V z(5GIEC69~L#rUg}&D}F+TtY&6@bqFr@aW~@7}dBAV1GL}KKB#VT`an-ZzNk!@+xig zCq7U2I8Q`*{CKBo-t%Vv{5#X4%O<%-gJW^67E4ph#k^fa)B3FlX3N9(Qn_5q{ZXQ`O8_NyrGTQ8=Pmc`_MkVI6 z&Czn*{y`GN<#2mxL(F55pk?!$fMgS`#ArA}G&(kx7Q6^5$*9yDB1+Gxh5k@_-DU<{ zgTIMEI-IQBpyKjuO7{gdwdholdT9v_4gLEPEru`>?$NUtdZj`H+vx_U(yutE&1r&e za=*V4s4wQkJRe>#LXm6{qv;VL6^xr}`0 zwp(2Dr(#Ixm37nE_a?QqMB}umnF`~geVn?|=Ig@7<3F#?CsSNtZ4jeN*k9ZAIQ2hw z7fYoV2V+XYagO`5Gj4`x8o^fc-6h%$JEPgs*ltBV*uu9NNZ2dv^i(gs?yroiFt*$O z=B0Z^kB^U^=sZv3K0x*q=HZzPf8n2iBne(7^hVwv^6~GLM)H-j_okVaCyr10{w!WB zIHliSu10v_a+AKcilveK+T$>#ou`NT)7u+C8^SBx`4sw%S+g>-%@0!KgptOJN*=V= z&DVH$KF6WoiF&5m9^)*u{3IX`lVXg7+f2s9B%jNCQo*WWpNBy6fvfC?QQ!CDMYqEl zc4Lhyv$(GUFz_ztU(PnVROr|_md=6~*P%_Y;cp^6k-uMWK)%&iFsVuL#z ztY<5G!v!%K8$52#CmiU>5w!?M@laks!)Fo&oJvj)mvjYFW)2~*s!)$SUXq|4>Aiiz zYrplj_`Q^agW~v44V6FftfgtfN>tudlPnUBNAEJl9&L{o#QgkF-X7jd9E`5IE`};r z%R7;^_@%I2@alZdy2|V>$-G3l^kreVplBRJwVumL2>sr)FsJD#EkWiJQl8kZFw(xu zqt$(8ug5w9&U4FQcCAfa9*%_(HZ0vLn|0~P{^X55X^V=^YBm(A1lCK<+JZ&3d04e9 z!Ij`Lz>60W5uqMI<8Pptapg^)rq3!++l|v*h9@?J=NzslP}mR?6SF_f1apT``|N7f ze#sNA;Xz&|CK~poy)t<`m*Ea^GWh3A2 zKD^3reFdr6uc?TrP^mo!A&o{bZ^YpK(w2M8ckkYDr4YVt68zMPaMmh{-(i6Ecf*?gGgoKQo2hRlPjAThp-0W(+*z?|%B9))_f}ol?N0mGCJoToMAUFa= z)aZDvmqV`|rAat#+0*`DUL%=$SxBJu1nn=x_SiXV2Dz}l!pna-F@uE6cYLhqa#WsG zw&2~5Q_a7r0fZ?yE6H!UaP<*PRv$-|eI6hY*w9+$7Hg1JwM4>S#1VGb86Wwg^QAD$ z?zn_+`F4v9$!w~mI14fBRjCMmyZED0Vibx_y|LUkXSWDx%)zX|ANDxEEQ^EPxXACkFf32De?BZu+CH*9@NGwpX{+kC z6&c$El5tXQwZMPY!#^<(D-j(|!9L{;BE*_#$NBK#;(h9&YzXd)d)l9W>--YLBhFTS zf7^8tY~$HOj!Gp@!M<5_ML;>JwwxPZg^X-n^&d6hbeV|r8TQ~5w_lvTb@eSY$blf} zMPMo6jBj=v(X8mKBqFG)3G6FXd{a4X4-9&CK}Px7IoBU&$scrRaS>Ky{OPT;Z;XR` z86EvWl%Dy!IQA|_NiM%!&vwXyKH7-5NmsgwCkv^Dh)JxtQf)0q{du)D5Nfvm2;XK6_Q15*a{;Mp0BY|2o zkECREuCoroB72KXKIm7tyO%0UWvJ{BMsYYJYD|L4Bfac`bf|NV9f0BDD_FOkn>_A@ zszluwny)|jWPh$Ee#!IZjionL%W#>TvyO?T_l}cl2%eZb-d8Cx%$1{V%J_I)Pd;f@ zDs&fTm*K7_Y3cGTb)Ad0p1<5y*t9TRF*+*Z@Ng(`G`4;sW_p{82H{nj`#CY^QS=fb z3DyYTU#{9Us^D}HT|eUWLUg^!swbE@oe`aY7EYwH<0@BS*d%J3fOZ5)gH}Qu_*>Ud z29bx-=}ibxl^;dzckT!2o_Q06A+`wctYbExw&TAX`Nw4s57;Z*~pMU;`Vxa9C4V%%Hnd8vLuFe@|Y0LlcE6L}msz=5a@KB*j>^rIx0a)KOw(-+q4s7ROKFsy zhIFr_V*-vWPQlit-+49` z&Q~H|)TxvO7KB3ze};pP=f*D7RBY5pM-9AK)w~(}6a$D*!Gcwn(ec4q33}wOI2EL!$aaF! z%AdQ?p#Xjn$ts7Gq@?Wj=nn@?;r<<8mk?c-b%KSZ7z#nxt&A}uyWziAo9R@b!f;aZ+_ZMeY@q)XN#`^_t2F>Z15XxDDP%lS5gJp`D4G#yhH^gq*^> zk5{`A5?m6G*# zgoJ;I*U7+Z*XG+++)h`kv8~Y`-Vv!hhUF0lC8-FC{c8Wro1)U@QP@bf^n#$<`M5)w@eqYu7|zNo`1?m`u-X;I ze-gs!#Pu20S|67_lt_TW>BU=NkDz+tI*5a`{GuN#m)IX-(;ZIk7`heuGC>ZTw<5Up zqkp0T!e_K2=6#yhlFl^>R|6Z?u%!gGWK!qSLHP5!aVEIOMjEhvn>RZ&}XsmUV;yy_Xlv1VS|wN zsh`<%r4y}*!kgHREtRT-^g>7=!4#rGy~3v#ZrM-E=?wd{5XiNzkRDvQmO~FUH{KxX zG*VJW;6(pR!LK5%FRT^c5vQ(WTKZJX{c&8lKTbO0B1I8ZwYafH#pf`$#YPv$o?;yl z`Pymfnj0|rY&yf1iT7r!ipc-8|G!e zRZ92k9d_rzXjtFH=j>|u*WouGw7fIr2HaTn6a)n8)t$K|eqajN(cRSfxT5{w3reN&Ro7x`ZAQdbOe2vEdICvZXWxO`qSam z{+GC?T(~O+8E>dJ1P8Gi71GiqoxO143E3jpHs0K?$Lr?UuK(F#xAUeS#Lb;m#I@t? z;N&s-gMW4p`r}qVbfVR@#$-+w;vDDsaOZTjU$a!HO$qhn#j<;fTv={^9(3{agu#!} z^HQ{=<=#~rk^mu2<4swz^EBU!;PFPFU^5olhQYp^xgu3A_;2o}IxX^X(sRtZWNA}~ zBt!exG3P4MNf)K$nM@lLoo|<2&QLb%PDkY($h$jY&<+{*>`Q(hb?GeuMWB4&p5GqT`(9&QSm%k&cWrX~ocb=uMDg7=oRxz|^CXrt?|OO=Oeg zt;%ea%*;$PYsU!gR>9WBHJFTMmR3_%E9U@TtAJLaDuc&+n1aE*)o_k1-dVJc+s6|q}2Uu_7hxgeop+6#U07O^WV84aq-Q2vk>&OL7eA{8Fo_KYYu-woZz3Kv-&pN?uh^5c-F4aE zq~Tayk}Hu3AfWs$H|Vw}-&<%~5YO6jo1ZH)&M?CMhGw-z0S&6o zTjvWZ%`ccxz3IT4N9=xf;xH|z1&7&+DbowoL*m)bi_WWIu1m`Se$zfqdtYCb4C}PR zV4i?V?1Y@+%fXsgT&dZP+=mr$ER}3hYbk;yx}}_jsmYG^V4<+&+QoFQdyPY8XwZ_^ zLz58UEeo{81i1kM3%0V7wmM&HEdQNJS?LfQTR8MMPt))Z z2pYey~wF%R5%s7&sV56QdZwG?=iIh3ZkAQ z6Mei~)ls#a?NEV!&>I|A9AU+? zYVq^kN6uI{7UBD8H_I2);dZB3Bk)>`1-mJ_U2fJ@BW+tMD5G_h-ah4F?d@gP7ZnCrJWe~&>b4LcFw@foKwNF|v zj0a3y?C@{E=dKk3NB{pgr%t8`?LYYWKYvh3%&zYrN+GEEfTZAe4*EqOycp}jydEyR z&XW$k>%82~_e_I+Io}?2ufq`+&^a0%y6#5-p~Re0n+cD=|BiaS!Qm{WBzP7( z%VE1FBkgl2OF5rVmTIVyV<)=NT93>2MXnI91= zjJX1iJCn&2$#TS`{1_oebUOHQy)WK;UP<^ar<#8o&**n&2%f{jt4WNaVNSaZg^TO6 zU55o+`2&aD$+}{cbwzo3J7HKWG6^6dsx@(@_6zEl;Cao1EA?vt=-M%K&e90!VzJ51 zVZpVa?zQ#eUNuL!03Y9s*A?YxPDXA;iO5 zrQrRN;no#OTwkgtVt(7`ax$;$aTZrXXc&2)m7VQCZfbkJH&egZ?BRBYaZtBOv2YIf zZG-VZ()wbHSBu-t`ThZr^3!hk?p+a-ho0L%;^!wJNrB)U&;dWt^59vcCepAulzzBa z?@)B(hb=U``IT!IV|kXDS}*9=beW!Cvl4rhO`s9K>-978@ zgZCyH6JoI!20ze;jvpy=>a9Gy8^#ld_F2~_<>Wg?Ko|K_9<+4Bs*vhcf@MfK(WK`@srUOK!eNf2cDdAX)YI)n~vqy zdjh(?8*d;Z6MPE@B`y(BzOL$WVNuCwOLKeJ>&2Jgy`NxZhPdl#d0ws8V=y=`Ut^`IEMif+s22 z%(&rOz_cQ};aEZU9KmHM6I|O)v7ggwPt@~wRc2~;*V4t_tlpat?AOo>SP!E7vOL`R zXaXnmcr`+JN3aa#dn|*B)K8>0aMsMAoaKL@5;;?49^Y*Io%y3CVhJaCZ7>X{iy0S8E*Qoy+ zeS5MvxkC|(l1k<_&$V&x=5@ij(Xs7)aSZB^EEsdDLK*Vmu7dKL=S+JOrY7dM&+l4){8j2(Xej0zHYzSIRZgKowQB1n7K85a zI3Sr!eZmxw)SFpX@%s^2QJL~>MccHprJgW%zOCZ&x_5Ug^~FZd_V-6Acwd}EOHVKV z_HAU7WaXEgxth}V#Ud6N*N<53pz^x;&A;t@f?NAAw{cA;p8i1)s|C{FTC4?(I%ki( z&EBX6v~T!g6$ZbZ2;cN?gitY&!I(;GSI|Am#o0X1Ce-yk$jzxYRRf+Gl5m-1ObEU7 zemj8X6G2#O@ch23vvbSsGw9QrVg4I5zzmq!c`<~PNrjZ@zh2UC@Gc5N0H$cOn1?XH zczwE^O}S}e<^!G16o(A>DV}{0H9lNyy7okoDM~ve8m=-|_qYJ>r{w4Ugv^P|%QBy7 z#w`)846S5vPSHPkUxYsAYHx4nBB!~tnyY>TA9^U)oYMff9rHlvKz@uFsn-!`dg240 zVaM`}6R~e1A-b;HFW|!K``u;PE6vLMP)cE)i!%afpVMYto9T*o?d7HOK%R+f&;{=K z6{t%?ruTN#FG-jqc~jyEb-V`&J%MhoDkjpgs?B^EobG-syRae8n@CC3!Hb5W^Lkq6 zha3R`nF$7}k~BC_+!Upeq>^NItU21mrVJ{+828qv!;Qx`Sc-e@~0kaaX73XaytpNgWAjr9ZueHL5 zki3x`=ABU%m=DfXyr-nN47|v`ZCy!)dq!u=u}m88n!X~O>kG6xLIvE5tQRXq{=T>AeZ&+->`1@)oTC#{aat0;oI+HylH>!9VT-L-nS5) zhZH_Ldx0-H{;=g%DGMhdEOf^FuW5XCxnE!6R|5;|E$j19YLpcqrhkBXND@Ry)o!AL zsD!%LckqP4WO+S|@l9jK-+k|z$3ywgZ=VEL218Mm7*X&GAyFkG_9&0EqG%-d zDhL$beze&druJaS{z|fqr#4h!JTz&Ks`ME87h#Cx5wFf|Pfb~QMvL_@zTy6C)wNKY z0RADN`xdVa4iQihqaGujNL7e9l!_FcuA-2`XG}1AWL1Z(0)G_@gb~aLb}}pz3O_R^ z_x^UFAx$4&^u>@e_K0fcu2(>dC2`7rYA+=fNUj1a*Fmt_ z#pttaYMtPDT)6x^qVMBH(y=$4S$g~f7tTS>;|0J1IcgC3dpHg zSQG_(GoDBKo#~fN&ChPl;l^6+{%eW zG`yeSi_LGW@u-~Kwn6>p2OC8Qu012H5~7#a<|O*q^r*OIarC!q?_9Z6jT(_Dg}sG{ zjs!h=&_aXLLjm#|(MRNtpZWmE*h~1SXc8b2x>kZSCA|MI>;LAh|ECYqAPU8+@_2E&m8MpnCRU}6*v>pT_UrNpq~__@&C}RDok53CS=bTvavFu8>xr_ zJU-JLl8^?;&dLgX7S`jJ>or}L0<#1jIs?fAnifku3*G9b@%bt%=6Pb8R3gM9xjj-S zQObjKHvV;g^T&UZg;qD?{}C^Z2#B7d=ShhuwZ8k@oDAT{EDSQ@9+2+D_97GU1G&s> z&nQRf$MUTW>OHQ)`cnIzMWdmi{jyn=D$s7I9xqhu3m;g42N2S_;C^;l8QX#rm@Q`` z$x~%P-=ZRT^?tk-L?55H+PtCj@^ zF-IZ3gO5$<0xux0_XMmu^R9t`GkFho zz1uxw?{X^(T!xLBDzeY6^cE`b*?RnJ(2j74a9*~xwQa16-lcDwRxZck5g%|Bs2tPn zPHql$PXcU79j|2@GMhxXCdUPsxN0CKF%1}KReYu^O;rf2aV&s^7zM{nASYsUe|vQd z(g1w`>@fl;8jpzl*yQk6p<0n#Z}f}x>N%?zQ2KfW4jT>9H4H(;9<4A|ssmwt zBmmrfpz*B3Pt+hI#~`FqW$?>n=GTvdprywV(BoiC3g`!xmRnnbPr=Ydi4BPm;jT!p16KLinMfmTQ!BMC_9 z8jzQj;>K6UeXJ8js7D*JV6s-TSOh_q(K0>pFjAmy$BWdJej9-cGg|2kc~@zoe8wnr zHm*`X1ZK)Lz&5WxTDRO)ckxwPFX;qOqv(T~RBc}D=VZRJc)imhi&~*-l#s`Dey~4p zK+o^^($`cZo8(&z1tliC-#A)96a#o6y#8_yW#yDme?>bFO>u%Oj=Nvi5vHR^%(+N91cKtHy+u_b$_+K?k zz~xx|{-Wg}4h{lz-1fQat~Qd50M#PFo}%aP7Qx})N2WTZq6JjD^u~F#K!x72A~>8o zUvp3>O9=XP8nRCV*l@m%lr<)$kXBLJ-}Tc|9$$8YE|ZV4|G|&QZ_m}Uj+!nO$9Mt| z_DQKvsRm($mbm2S2Ma}B$`dJL<8q2;9EAChoVpu#D(_f^V zH&g}pJKGv&>K)9_6!{%426;|@2Ifl&^nELs3c85F08gOy%is3(LrsZSn#B%k6pv!G z9Q=JQ#US{Y0e(BCr)iFt+j;45tg*8o9>+0DFC`&iGQn9GPK`jlL3KN9N1UpS;3}HE z$?3|GUBexIHRN^w930hh$57F#b08sKva_=rds+{s@Qnh!s_g{8-0QW!L6p_vG%9(D zaiHcDZJpi?@JG4|Pz(2?+~{@%9!$d!@e-iob12j7v)T>W7>grTT&1V3!?CcjA)c$e)O~78)AIixPisUF_1p+Q#`eAaI&J(%H{dmt5W2lC;Te->io#xS5u+r;02k?}r}i#@o9r>uJeLlS z&B&liJqN{3bF{_F%LPc$bwHM5z!{WtcWU3L z>wjLAj(e!6s2Gjr$evSZd`gFhmH=xr><-S!$>|Zin19aN2=g5;)zw`E5L`$D0C6a= zjra?0eUKqJpkczJdD&87qXp?K5?ZQRd!ZiEv{V!IvKvjOTAnN+-@;<_e}4-R7;EKI zc~XNt_KSMKKyj%xLu4%XOznn)gX5Ik+bPiI9I;QTk?=@}n*&?(Z~@3yv?mx74tQfG za<;z)A1FRCD`YoIi!#%Xa>T4B*;@Xvm*E!JSh-T)ObdKz4jK)Y<*X`jQ0a{R#42Z^ z9=&!uw_wn1ZalaER9S8yiF+O3kI307^Idia`R`-QOVG=+U;|V$C{7g+RhO>EDb46ZO}z23`y=bW#uAb&bg&} zD55H&BP2Sl4G2es|O)A1h7tx(!3RjMH4Cnmh-R zU#pi=cP@G(BIf|{N)pimH*M`a0dw3TMwy1_9|H=;+ucV@NSJrCT*LVO=S#r(D$A?G0l1p`{)n)?i z>BRm!gwp+Odq>Cm1@)D>wnJP=2L`X>-UkAADq30z&?s}&I!l|*le!R9 zpNijt=c?3!V+TgqQh*@Li83_NSB@GEo5#Kw8s@lizXH^RhEo8>vvQrQkeH?C4zM<8 zO_8_ML!&2vnZzjxT}e!qXcz4hWPXYuAOGkqH*_=1~VD*t?+>I25%(*r%oz z`kRRYBZHe5^(}s@2u=IdbB3UE9)t9%9N-A>5bSD&-}(1taQA)(MV7aS9RI&Tn(&yG zL{M@85Tg>SArMLI15yxVB*_pVwuN_&E5n{oDY`+vRHI|&BA~WY2qtp?!c-;A*JfuT zI>FKuE)vg}K161}=D_AY`Zu_C`awgaDwQSX+R*Um;3?j!RI0*IK(<+%T+c?sS+^uT zz=@la4rbDA*2+~(Wn^tSjRq;+f-mA~9Ny|UsUL^Oh%@q1pDSboLaOUbt2{BE`-U`3 zNnT#St`QkUUqK<19Cv%ZuEMeFsMF5P%0}Q}KJi~u0t%bO>p}mn;=RR-cV#^)(nQ`1 ze`Wg%#L+rQNj`qIRtmne(XtZA#7BwN=P*1kU7(#IFts=|-4!L6_D^I@u+r7@HXX0Y z1VNwlzrt9qN=}7mS>r4&G30hfw?$2P7}ssOJ;Jy@R3F(vcmzF|MCH`E zoIcH-qCCaKrq2x*wQfp`)}P1KY)A}#6_>q{`W0UeR- zRdr#n_x#131*syQY=3a&RDY-a4eqGH&)@ID1EZ6^o6R`QVkL$gnR2b3@>$Nmc}^`T zhdX9>kYb9?L?29#ANJo1{_21HfUt8u@uhd)mm38yPmKig=c9RGoMxLg{uP!x2LXcU zS@gX4h%*J7mt#hHo1M4@%5}gjsM>~&9(q!9L|cR=?j+2Z=XJOJ%&~g9k-}s7taw`- zbIrGON_I-=P?vXsSmcbrT8`B9ypjY^F?R(t!`0({&Y>OxbuDUTWo44r;1DHpn}6Q} zHy21CGTYeLz=v^pP>bL-jly{u(Pb@4dXMwlEyy>(L*yD|pM2mJ-Y7^w$jZn>0u3|m z1|Qon{!LZm`O1!=Js{ap=p}r;_1tvwXQJZAGk6faBk&14OAYT43yUCV>vFSS^TT8$ zi=-K+mpzf=EWSFLHjl|D49#<(V`xc%=Uto!3eotEbck24AX>Pber zQF_~u(xdXYyU+o@XH4|ykF9of&7`8Cb_?;}BeBa3nqrphls09#!a+HO(|Q7*K|KZr z2F7!MIg9lolad%kMMXJ0uI)jVJse#yXnOhsK>|+3H$v8G1IaW1)PRbMf=7NOjaoeK za)9y4NU;mB@y*YVHHwn?fJN)bJ6@mfzvBGdAL;GQ6g5?6XDql0PO$1vU@WN6DWx&N zfzO?o2-l#%u*y5_k(Esjm45-HoMq&JxRj@?4^9I+pa5@dGlh}9{ti%d!CU`$bGcy>M^{1wHKyH;aE3k>;1xLj6vZ zzsx-FO0)owewQT?qNVP%?DSUhZs7Hj$@G!PzoH)57-j6W{^s>Uj@n2*4ahWH{P;YO z^pB-igTKNwxf`8;(S%696tG_oi|I1ESyui<>0r4^pLt-bpi8Kl8|Q(|22JLsFMq{~ zbQ3BK3cNHBIAzff3oi?Wd9eBji*JKD)l#r6r+WM)i zoZM&&p9>20T8eY9NTr5<8A`B|KgHx<%g(DM_mek#&ae#CVokMx9~DZD&a?9uQ}2E9dOl-fYOB@P zSv4U0PXpl8jJMyJG_n>u_Lg1W5*#?=3pni1_I+m_I};DWGTlr9K1u%c*<=3p{>!Qu zTKSy1ro20??`4B$s~3C&Q!pVqB|lb_BgI@mij~{^i7<1D*_ricV(|6f8dr}KC zO-qtX5}489#Dlg}D%hp#nSCy-B1)6O@bOTbv&9p=HY7Q-RA8k)CjH;m_W$%j7sm1? zhuxql#4EmSY~e!!zi}<{9_JNtJI~mdiC!7DN>w8ISpK1bD__)-);O1M0=6R%VsO3# zAG2Bz@?1gY?gcphX5;zPAZs^r#RnAOOHi))<1K*Z`vK0$FOM6?8E_=+cPHOOI81|e z2MH;u9icp>^zT3>%a!Uh(Qkl6iB&4O~LGXI5`Q~8EfJH*pFqHNtJv!qPCthrNdW#~fDxg)p{rzPI1BmD3 zAdL>DK9d{|mLjU~Z(#&pUS4nJHU`{wqbn=9;GF`5@-+{ijeRDt(HMHW24tDR3DxKp zFwB3b7Q8#h{|waGbkHxX^*6vkBrwo{-z(4jZIS93T3Kb}P}D08j^mNoFSt+^Zm&+@ zgB7SDjNrG%0twOsEL6CLV8m>>T)`N4`v5F;2l$QGCV{bNMKjC&=moV(|IH=5z?IZ`7-y8;Ap`0{> zc)@=_9hu8y0v>efiY+jGj=_$BDOqC>jth<-e67bF7_LBOJ)0>v2ns~>-vLIZ+T{~s z4v7|hL7xPzCf6$H&0d8gW^-(K-*mZw(av~*W2n>r21Khx1#m)gJQQOZBIPA-fcf(w zjXb7ht;VGPMB^(yLsr%iNcBa6Ws>P~4Sg)%r)sV6)ggs+A#Kp*vgei;{Nh1$;U*n! z_mTt^<^c{_Ksc;?1yVLq0NAM%wIH4{Vg4J6;j|^l%tkdtym3Q9kz{(UROH{#?dAI| zIt|AXj@{Ag=+m6VR^JL@f|0+9kyxh_s1k+Mx8w_@y2377nL#;Vn}Y;0yjTY6;V+9B z4nFrwSfZJkQdB$;fdHPzyPRy8I)m{WUSDO+%nAel0id3xA<3IpBbO8tf8g8EN_I2oMAgQ|LovxcE_3vSV-H|9d@Ka7 z;%me=o>C^~i>n6sr|w$54hZEM5vw!YUU3X{0TzLhRLb{X@v+t550R3ywzW~?l9?Tp zS^QH69^h&u0tFUyIAbtt)EG$>^vZ?|Njc{W>Jt?7WU}d8^T~c4Q-NU{PW|CaF>pC> zg#JH$u=)t~9J_j|pSE*oK9R@!==_Q>{_O(|xFTO9U#6S({ePB2(*I&J;&&augD6X( zN^?$?f`M#!G+0#|!3u#BN>DGXgCyV)5I&m~|D3!$c@P!K0p`6eaC;JgyNJ)MsR~jF zId+?a?^--P;Moc)~YZOh7dhb}zc03~;!fniObMK9> zzJIhTH;_2a(h*KA9w_rR^1^g#C0k`J zdzDqL-m9?|b9|CzS65E%D+&aX%x7G9#yazhwe#)GCp{^t`q}MG>(7}fiQCl{{lj%0 zzlV(z%$z9{WZ%S5L4{9p)2hE{tm`Cw)wc^MzcS@&=?hVC%JqMYAMeO5HOv0M@^Cpn zhGPr13M<94vi89yIY!{F;q)4)ppuq3V6r4~7<~rXj^=Jp-h*uqhXdPFmhvVKffG8kGThGPErx7p@YzOW}uowkTir~L`zn9|iHuVnF zFf$_GGT`{^koTLe+<+(qSV|7P_jJU?#Z`?|Oy+sh!Q%ezKVSo0RpSzi6$Gi=+4ng6 z#ABnQTtWZ`&Efls`aLc%xdB~?JM=sZ=&Q-|NF4j{58^kpZ=2m+EFqxEik)%M(b1Vf zznm9*D9>Uvbmnpc$sEh)b~N1FYIB@o^Au2LSjzd=@E-ry_JDc5%L%Saef&>K-7B)p z>W1tr0uBZ;aL|QxK9@)2Z{J`Zu7hQUAHdHu_Rpt>;V34si3WT++nKPN@5akRR#0rm zX}3RG`Mu+kQ}b^Roc7R}AgsK+{9N-HVDHx#v*x97@L1DhEV6t{3gt)ewDdw?;pa+W ziS7h=ypDkL%V2;(%l)CS1d7#Vl^+4yk$ zZ~{zfoSQv{C!dOUI-k3DT~T9#PHWj6{N8Q#Lgd_hz^X4mCRi~pt~)J?tA*D__C2dr zjCTvc1@^wuN%`srA4IJ6O4Yt*sy-p)C!HtaLVlZMlw!Gws5fl(j}|Er?-4)eDx0(O zV{6K5x+a0#*G_$i>G&EHHB>44yU&%jV(kJk|C(Z+rg0G6?Q5TdD`__+!uQ~eyfBiB zeJUsc+Erx^O+YI#yuM5}vKkqr5mM6!ZU%6;*kFj=!vxO<_t}@eUe~smOjhA-+ zTv1M)?KO5+ib6E(S0<>!(FO~+xcUs(hvE;{Ou2c=V|^jF&%&bjUX>6x`7+d>$pTWi31n|pfNr$w`&rQhdUTR#(NVRIkY=+D4tRx%ZpOpLBI+4AY(6Sx{` z&ke;?l+c zbOQ)?8G++K68MRX?UOfLD|f7RM@-2Ml-6yKz}FpAMTEd%=CGXQf=g=P1(<6!IPR;K zRawluJE-4faoI>TK6New6E86c5%ni=%aia}yo4w5D^16w0MIZ!r2gfw``+d9Z_rue z^g)@~1S32ZYXlN4prjBG1cAvq3gp*&ZVp>O*Kgx+IX2v$Dowxl1a*`aA?94iyEDPw z<)o%LnnrnL^SH)f04ahZW|z*JJ}?&l-PVi@eOPa{>;E7j*+hzYR$#JMmj@wI2IIb9)(E*!6ikbn6CV%*2zG z<>vK}8bh;aiNtdhqS+@_zz=aem}dYv|GUJUg`6B(hIh0@+7sZz3D)2rSO8ZJ9XMe) z3GfiM0U0PlY9KjieK21)+Tdgj)aNi;t5ANsqr~&`?YWvmz8hzx!%dLB&XfO;Fk`9k zUs*S`eL?c~haVdd)N$38z=COVMG~#ET{i?IzdpR$7e@8?@nbV0idMQeNirZ<`2)0k z5(rs@F6j8{AkGjC=d5!y`d*OkRcDYHHk;st+hViexA{5;OR5HCTRw064<6Hc^;DVo z=Zzrhh)6dI0=u6Bz@jr=Bp@LsQx`|mmbC`Uw-S5{_raM_>X@eV9kru$CM=%^A#jo| zq3{g*R(3_Q5{M;3#njTOJMVuVFw!efw}XnTe7^FHHEnx8wD1#Z1t3}GL>;UBz!3>B z0=gRR!!-Gl@>mS->zr?u529cfRFuekrobwf#BOoG_%?h z07Ot*+S(q$6H+I8)Y)PQATZS;;Xe*AJZC1Qwv`uHL1I)ohnAMC zR)@HD%x{y|B{ZdeRsQ4`@x2w>PiTPvCWVUmT(w?;*d}q?P*suaTeK~KJm65lyd(_q zs*Fa!Q{ZFK#whY`xkx=ZjMDRH{7olc9{qibEjH#o1(W#~ictQitS|Zqs>C;sn*@-Pg10<6^)Xcacz7BK2yN~Zz|sLm4dXH$mFQRoRW%-9 zSQ(I?TMs0)8m$X8@cAzVdi{M+lVK|O zepVnQPz`n9w5Un|t4}>(C6Zcv4Mcn6LE60zc!sn9X8|~q?KM|rP7n7tZvj^sTWtfo zVuk=~!Et=u`K-CxskX~{9K30{VUH}O^V(#ElD<|1{UX-}CmW|oR#q1N@i~e74u$tp zz;*rsT!s22xEFH+_6%eJdw=mlE_N6e0C$bIZ}?@u?pDTG-tK>>5TW64`N}%BZ-=gr z_lQ8m=Mm`}{5y@>pMW46+fr1b8|5gC37L^;efM~e#l?gD>?_wb;hzvpr=(bnA_kA$ z!72lEiifWzolly;y388rQnNNLx#;iHb zo|V=LCg!3tKLmgEr|^$S`K=E&3&uNm!A=K(FIZ$aCN8BpZ~MP$d+%tj`~QDjAtPI| z_sT1q%HAtPgd(zcl97yztn93;jL3?L%Ff6pvZW{^dzGCczW0Z&_viEe{?7OO&i8yi zpYQd@bvm8vGM~@;xUG8y_&(J^_H?d?m6KB$o(OILNXjggP@4tp6WOkh2rYn0L7$($ z-`Si>Nfy0HvXuxeaQoZIQp+P8NNGT1F!$y{o~vmF5B$2=QOE1a(Kk=(7%Q_+xCb)> z5E~K%goFc5g&TD+h`am3M}O_HYV-?aFro}(WGrKq&Lc~fdQ_5fL<+(6+aIsfjRQ!C zZ$X!44P4lfLJtYMil4as#spjteS6R<4(M1O+oPk^#aODEBAU#SECB3E z)dqV_KJ)k&^ho#iZ1pFaitz<9ujEDf!bZkj?l3%H$uLQ!R4$~%m2$y36D@eHx!61M zd(A)x*wO55egAqr#O&5Z4UNI2b$DDH%Edn(eYa}ko#hP_85%70EOhT>yc&`dGHSm@6u1L<~#Eg)wQcQggz81`~{e`Whl?Jv?ou+NGd_7e9wigFgr6USm zJtTU%vt&(+^N)?H8h~xH_kQL;kIc!_ykhKhu-_I%OE?Y{W}D-Ao^>JeR+IG1q_@eL z7ge&mr8Y1=$jD)+b^ZNlcZ^{EPyX{?Wy;Qd>|T~WxyQvn)~4xZ`o66|M|Sxat8ZGG z(mB#JN5H~d{06>0(|y%JHSqTPP09Sp^^&bj{wSr)n8a8%`5hY1xv3u;3C*f8YE&9e zY-z-D!yCFxNf-}IiVU9FeEwe7J7w6Mm`d@eorU9 zLXtTZ$F3UvA&BPI{O-CeaFMPC_pSXWX>eAWX+j(28z~Gpx~rt{VV&@=PA|=@&bkxx zJUf|?>s0h18BOGC&sVnz2uaE;yQdf8EOno`Dg&*eO2Yi;y*>u^ps?uhEB!PE978ce zk!5A>NG_wBr!gutXYS1u6(+`-oOt#@2^M>JOiX{pMt1gGPg;{}Q{}I#pm}%tmt3u8KR}uY}*wr>g`(*jcB#HrbVU=u3iL=g3-?n{A&Yg1wpRRxXj@RZ}__*$vL%4rvz`B5CnTBJbz8DLHejPDHV(;ZA49&iIwavE5R&W?^)Bu-U;h4*^p ztr~5>s!&K1s6bdjG#U@nxLT5c>C1KrgsxLiQnEwHR}$JPK;bis)Tfm;LJmV`GUl9* z0QU|6bZ3E}a9#N%#B-yz3OuLCQad~5+%VkP@sLRxbIxI?&j~8u>xUAu;X;cW9~<#F zTd6=)=PwEVV^dAPY)dY}Zc;}~+8q-^W-+dB6$ewY$z8_IPZ#YIz50Hd)75$EiX`Of zRFe!mw{Vu6%bGXM@bL!6y6fPj3!Nxk3q3{Zu;l0|frREd;;x}V3_6kdv48||w9;7s zA@HG$fM%V66q;Cogw$bl6(tGWgeZ%v^7!Op5G;kZWj!!=f=sSuq?<=2Fwdd+S}U$PDG7*D9DoxSHSpp3wv>K5mg10 zU3a8A%>z57?M^32%tbA&3PnQ>PEKoIpazrg_-ti$Wg##fNhdi;BeSZAHV5W9(_&-E zs60TBoPd_6`ojYs0uquWuoC}@vi*UiYtWRsAiSkOzZHv(WbYxU2;hfS>{3|&Qa3Lm z5u>k8!YGmJB6;_FBI-u*NZ3rf9zVW7(ORxsr0 zj`1i=%I{_u(LVW2m-7k~Y)ezvWRE``B1RjYA4di?O-((QUqBakfMlc0*UCfc=Vu_` zQ;Sw2*BSou>XGcdepgiSkL%$!Pv2cFA9)XJ5CdEHC5JD>VF-W8< zv)u;0NU!^i##akN{?dm_Ng&F|M({VGK%(_5RG3(0{F?q|k-i=d$P zIr0^7kUsgjwLSZ`Bl^+r&q9Y%hfA@NAp}HwSCIV2P(bA%jwKJ1A}I#Gt##cRT^*=q z_Umj*o)_*Ac`tuRJe9zc&ABmeeU&VHpS}Nm&>RT~qmQtDL^APfn8WF~DT4QHUxq_P zfJxT>4ou-H>fQ#!Z=Z44%ow*i6NaaTjwO?`6D$${-lIFH%eTh{s5^&EigVT=QKu5c zz*7z(>HZ^W(9qidur8B-IjggZ$bUaywt@HLhjQ}8!C7JH=rrf7&fd$bXK|pibps=v zEL9K+^0qNYL$DqzfN208sf}<(_W0OX78It32I@+4I{I$HfGQAa`jqgJH2`>x#)CE3 z*deEcz2!NP;4Xd;$Bv^t+ErXlRN)K zMz%{P!F0he{?op#oiKm$XJz4Wp?eR1pXX0*bi+L)p7Hv=2wQPFxedrj6054jkV2o6 zjZFb%5z@-~)kc(q98dcM5e>`w`nv09>*P*=7Vb2hcN*=W)RxL_4}x;ik)s@G1$Ka% z3C6o$SP^32zNibhMi3sUI+V#=0BAS*6iGk7VnZbgpoZliX#AN5*BuXxFPGQDeL*RH z7LmW4JUkcS-Q+rJ@VxI&Z5$vXSN!d{FS@?RRCH_V9UL5B@-M-2X`G@l^uF#^W$Iwr zv&quaZUxzHhW10f3!+Q&^UksjQdA-`*mIl85=U?6X?#3M9V<)TKLFP_5r1jTYd-^I>g}B&Lr#4pR%5VV9dB zyW+)j<7!>R=z(Co_{KZ{F&qF}hs<7guCr$LHUAHacij#5(<{VJ@$howD_<-*YtzN4 z>@X@+uk07u7AbSGC}XM`dyV%cWC+y*tW2y}E6~hxpkz1w`llw4 z-!jaOsPasL=14h=g6#??9ozq7)q4=x(Mxr@i zEpFc~eVNR6-2g1ByePpmR2~c~dXOGLjfXFX{{hB6hh0zy>yWa4W9-v<1!lHU1B!PE zpI}#T+gZmM?z!;QJ)t5QhwM2?l?nrtUSs0<@Qq?eobigo$gCxapL)*%qJi^!SMH|p3ol#ZP-qdn~PK; zB6Yw(tc)z3MClcm3kUq5yy|uQBWFsm?O*WRMe$t0JLd&_DcdZBlL5Jw2QFf|P`rUf^VI;$+q?m#U#i^-ys3}9Z2Zi8 zd<#565^w$iap=9i<3pU1=#lO&wgb6a+L`_J1lYYVB%jey7-PvT z53Y3TG4D%?QpfSNJ^6J$NQ2wnYw|uG7;j2J!pjPMDgMvi&*fN2s2d|rlVI{6)fg4$ zrn{%WumjtKkMUCz&Z3u%AkUtGiA4WcDt#ZhOj9ZhCtnOJ`+e74?f6v5&>9POdZyYTI#{gC*&rrrIf4e}4i3 zk_tlp!Pl=eWF_FKD-(|lFX54DE-XC5V>mD*wJ@rj!X9}MK=0&sA%eH*gv zMF-TMo=B#Qiz@&<$!0R8eJYP`$x7o>SFZx9Rbr3I1FbhcvaTBuzNZpOe(Ix9}ExJlaaUl zHS`cP76b0;1$kh1yz4>s*S{APRGoVcm&vf{h`pJ%-K02 zyIH38 zg6@?ooT9u#-PHvz{MQK#n99Z`HpKC=C>fNq49Biv;R)Sn>5w8+x2ntXSE1L|l8&=^RqRO7LV0gc}y*b_eagz*|z zT}C;^UVw+?0gveMyaOC>R^R|bq0Pm8NMIDjC}{wgP2L#NppQff;@IE1bpz~b-TURm zxm9ef+TH16)U<6a)c1ps-rwfWtjLz3UDm@OslvH++TWP*6^##S@yq;+yp?*4cJL11 zDu{w9r~~NO8OS6-Q*QBP$I-H9b=$mar$O+39Y#Em{1Zrd9tR%c#c{HCSE^iR)sYUW z77G8z5&J-7P>?>ere*LiV8l~Gk=S{>))3aa%;~GgdT39d&d=OP7E&{J*}Nlj0GzPzTMBY`r3 zM+9p_u#Wc`K2W7#bhxZf3<uSe6Prt%Sr*vY^n83;$@ez`H=lYMti14$;GFE)6 zGWf_?_P{(g1Bt>Q?C|(8{-Q2%N?u98A>c#`1nB6HBheBiQ4@r8>mJ($1wx1ZRg=6u zXAn~OC@Wr5umTU;5K4`TUmoR(`>Ixoy}4UoKi$S_&U*Nsr*?v zzmU(RldDZ*dy9f^YgWm@CP%bx=0p(uo#BrB zZ6yX-{@F_(f(+=SfjdRZ-eB<~wN*YFtj87>7G=%0c4}du*Vq+Ji*#ih{C$85jL#K~eVMw%WB=M$6N* z_P4^zgIziYrv6QoG^>7{Gn&zy=2idtD8pH?t(%g5gJ$AWW2dTE?`m=lX51=Cy%@Up zr6M~&j>kq;_LJ>~NU#&wQJc(6^T)mvUHX6F8EZUf1Q&GhE>$I1B|P``BQ*;B$>IBw zKPI2#JJ)jpn^2MNLrLlxLN>Lm?1O)AE+~^UjGP@!^>jC?UGr{!snS6sAlRNzFKNPfzCs6ygzb zlz~BZRa(gp(#Y+BuS4%FGJ?9q#l-=Bc8UNDtZsn%8O?Vwz!Z)US^r}H!-Els^7jHYk#E)Qz`(XH4p@Ma=K9}%5whc z_qXomTU+MOIbCVoAC;HC55k1Ar1^RI7tL5Qvr;$lZLD&yd-e6&nWrb&eD^71Nu*5u zPP9B-ROtSEI|me&r^2cWSc%%B=?fG{EnEPI5q9~eqBT2OnZ`3Y1$NqY;SEw3T2i}Bwf^As8s2^T zy+#89^`)H-!Lbyut$rM;jj4LIdTw)*92!vhsBDiJB21$CRi3`{@BhJ3p z{kVcZ9eFBLWhviMT49-X^cc1|69+CM5yv~ay{C0Z#Y9&MqP{pDWn9;Wb|qWMi% z;-pS{^qTd=N-{dgID;njs2YDS&Bs63Y;R@6>?~xjo10sm)4U}`<12hoJ&_m@w+xSY8d?3m5x!<83Z4yizB1{p{a91I z?eq8-uK09X#}e>}?O3;{>8gBJrCd9wJ9vd}&*E<~MB0@xk1KM&HPyAt6m6PZ13eY` zk2J&zwg2rucplp~IiZbbIrT&MZa(p(?S{L|X^`jsrXrVokqTC-e5XMh?AG+sX9hP~ z-#*dnjE}dUyjkEG%FV_$6TdM_{2_>6+-oCIMs`MZF!M>)_CB;>jx%S}6ciMryMnOn zMVr*0&PHE)N^PnI`nH+)J|pjkgu=KE-xq-)>j0au6}Zmm{D9uegV$R3h0(cRzn4p9 zUw82*fI6}RRUD)s0`SWqWYXmBS572kgv$#@=o;sXQD0(D(E!AR^?Xmp5yc73&qa_=`!Ni;@+0KNgG z8G5r!Pfy>Wul1Jwvy}iKz9})*vuDqc--0TW1d@o7V25;kBpmQtK))!_KzrJ=6!wm7 zmHgb?ncX`NHs`j61AM1u|AtC>aZe~&0>`>7pfm;(fEA>KyoNxkXvMc@!#W_)Kxa(q zc$iojZuNQ_Jt}mVQ{)eJ3^Tu>o*BY{!}-n&zI|VnYkx7FhO7o&I>r(vy62XR!yoM4 zcmiSa>>AD*VN-F~E-<$C407$K^ynL)92(vEg3h3H8BSI|2OeM_o+6{|D3AC_i%}Ld z{|CoLP78#qc;YaYOhQf0Lvsd64D4<tv**rA;cICX2RA!%I~5hUj)okYLNJ zNo206NC3wgS=N>9vU#D*bmzk_0n>=G091_M*!|VEy!o`ionZNPD))BPJ3_ngrVd$# zT`_omO{Xr5#se?Is4tT|Q#GsxFs4qxaQM)0gtW?3^Zix^r=-M-R@@^D?rAFC_4#Q1|$(Zb3m z?eUgC#ntqZfPB{9OTydaH8<&_wVv9{%O_g{P~8jZ0p`Dbwk%c^e=OoH^TkIfmz1H0 zwCHD9r&)TNQo}R_%}oLo>>p$leJ)WA)|_0FruYt7jnkKFjQ!vo5Sq&=Y$j=3&O`}m zSz0Ir#>AS(iPryk0eEx5EgBdeQB|>5$WDkHaL}bD;L4q#u$y8Qa34!?) zX)IyHu!l=XXyYaUmkO9)5&?u)*VT8ZrDUostC15_n;WrU4L)cqs$isVa#8^1&7v<8>5ux`Eu3FlIqu#57w&-fb+2VT;=lgWr^V%6?nH0 z*E?_<^rtfb>m#DiVHe<>MDX5r0!U^B3j~_iqVXF3>^w$9Fowt!yk(LGGq6b~!FOD^ z`xXLia#5~fd@6tjyU0nf4|EF(9L@u)(gr$w!VnKj5nGh_2vv(I=Z zqsw~(!E7eaG&IyHy7b1lqds@*+M?^PF5NMTnELm!{B%=a_ZFT$T!~c!ZKL)6?)nCN z)kVU+-^p=6#F&TTCr*$u-2^bGwsPEC10o`_$Cbj`L5^=yaydJ&%xTO(Itq6B0o>$a z7#yW5fP!@eps)+Ij5t*$7|CHIMdz0?eL7-UF-$I*;D9Q!`vd)G7E(lXob6yIL1xF+aNfZ9n>gdu0*sa|DO!Lg}o2*CFkGP2(p(bLm2=DQ`nwx0I% zPwD#$E!CM`bF|wQIfVC*YPd0j*Xd}mRq6s);G`OIKEw>B2h1pR{yg(CWrr_SuPV%y z_7t$l@%qDd5!*I>dG+3hb0c=OilB2lcyZ(0PS$D z9%ddSxa)-7r2NA)x)dfIi={M)PI)m}B2sDiGnVuRIpA|~P)0CP0i;OgT!YoY~Dnc4Y`L$H>;W(5Ql~{yxA~Q4%#$RY9TwZAJ(!(wF<5h(qM3I5-3-FJ8Lz2>DBq6cC2&ik}^6im!l>+W?U0 zBEYyS*W!?y3&abvh{^06CS{i4b*^2996Dhops91+_zxgVSof`m6B6VQ(TR4*Z0vKi z<#s*B`>h6xYy3lqf)ihm+WVMr_aH5Fac8l;!7_QiCOpL<%PD!?erRpJfaaE^`B`ox zyRrY}Gb>Bw1FULSgYZw3I0@FaE*Jri65Wao_|QGUTcG6rp;uZ5MGSLd9meUJ8a~ib zpvby)gjOI`5bR}y^(ggOh6EgY@gSsM^7i(wLCpajw6p(Y{_(jXiUp*4lF7lvr4I7M zS?KKpg29&V?q!MmCPp{lKnc}Dpw7J+tI4C@_5M!gp*8U9OyhcA#yJ?>@GU%_KAjZf3*#NsG0b<(11hprXgYS}Sg6-;t z-!E&Ey8)Bfmn%L$3}FEiT^_O@dqfpJAj@n8vn+A*ukdZ)D>~85JQuQ;ur(!stbD14 zdrcF`PhlV6gGH1AWRwR7wsk>(yTqra{`@6k0!9GvT3m1!`h1=@q)|pjapDHZ$^^QD z>KkUPn9ELH|1sO5JV8|@$=U|HUoQBP=q`XY@hI<{j_#dgHF$x&4t~v-tJBJn<>%#f zqTDE?XKLfSRw{>#Bhp#eA9bWxJvoqR9yx_)X3~dcYopO{gJzVvh@<++aad*7A67IE z%J^*#6%`c+1||03$CO$ZFY1s^iu6jcKa}1{X*-7BfEGtc>L@4X7*#v2?aU$-4_EJM z1U_DUVZ;f~v22N5i}Nt3LdQf7fE8PE`n$ROQUo_cZBS_WmRQ~% z&5rO^TM;+!g=q=G)RZ6Urud(<6rZ)zjUV<{Q<;(NKi&YIZ*BI`)Y8fQSvWXnG{6_+ zz?79;5})9^Anj{X&F`u)d<+_V{6t-Wn>0kCvGZdt*2j2hd^z^)OwGzQH#wE?$jD?! z`k?!z(D7QJ=XFU+wt!Uf`ObHdEXCNV_UCPeujS_DsjK0)HHvJ~T5#YeY_dpjg@%S2 zE_>wau#j>&7IXz*QEsw;dSgycE7vyvz4g?kJeA!$@M#rL-djadUUOI2*{R9ZaZ^KRmp0!f*J!78>>o<_ zM)z&odIfcr+H#IjRuOY?d8z-&tT+Bo*bTi+0R~ zXp<=6HP>dZ0(Lh;^%U|@9eQuTQ`v>W)!+?Bc%j`F7+B6Ri5xe}`~tj{hJ{5AdXj+q z%-a(~LVNUH14ccx2AW9>VD<4p{}4;wgM$pNP#-h^G&mRKs9+4^-YFhkB@0W-JN)CY zq?!O{^kk2X38`d|ia)M^oikT0HW=ptjrh~ThV2~5do!ha&y1CYzs!C3gtbS)D8s8? zoMNAq)x`;z6)Gdj5os3$c=It@mo5!^nRg}foJ~C;quA}^v(*83yuo?V=--{|)Bbj; z0X*HBJ`dSQ(bcZYmrH}G2t+(?*(40w8Hl@dV7QCuY_rB*bH5We7EZ+?;)0viIU!8-m8C{FD`Ag;wZ&@92tf~O7P(~iSMoXEEN z2Q5rrD*=QRcKpbT66Zq&lu%;npS%p9&Wf=_2FLD)@2uZMVx}!lGwTUW?2~5$j-QNt zHrOXmvzMBQ_d+yc%RcAHaZD(2Makt{9mQ1*^@nlU(K3mNgZFjwjTik5M8D;Ig*2xl zqk>D&4}hpp!ki|EWii)HS;1^P;*^PCezZO!R>D#Va+LVMe5T_f2AS{C5Eh>Zv_kQ& z4=@h;W-_4t#Go}|R^&!2D4mLb!fD5vfm$!OXgGcJ1~!G)V84}A*-9&h{ZiU{EB5Tm zDh2PIm1iT%=|BE6GU+3=bpWFVg5Py-zml=!;NeqYDmtv-qtlpuY*$5TM=Zfk;;ameI1Ja6`2(CiyX%GQ5uTp_Fu8MJqS zM!wPuu>Q4}%m-UrTe7~ZQw+$`4p01;&{0R2!`623bL$_VY$As;lVJ!PXV-8fUI}YJ z6~xG;BUx|}aGY?Zs|Cz2hfxJCfTu0JP||;cE%o3KGB(R3=$|;$!U49`HuLc z%BfBMj!P8mWapdJ_%0X{xu%Brl29AX zTsus4U>1}it?Bbg^Xf}B7oU2}k*D1Cl=!Nek(FJ&M{sAk9q-K+rMAxgt&DHfVSN+* z>01-HmQm(6yWz{H7Y8MhA|;&uO|fGpJnOmMuf)M2_Nhu}Eq40y+4~PaK#nB6;hAQY zCQ*D1(o&NRx)tHjJ-rx_@Wn+>+ssFBoPdQ^WY=8&9q zmY+wpfA?-su%2*j)NZkzXu`Crw)!u1>(yAVeV3VFy+Y$2SS%E7zQ)11hHH?LntA(f zjoQtdwZX|4hxU~A^``~Tr95xFh`yLEL(Z4i<;6@ray9(n#es^A`>b>4eUMNTu%@4r z<8%32_gJJwS^*|$F0cinwud0@EVL)Qq2W{~OBiBv(L+Qkro4uc0W zm1Cjtc0y5|MVHG#GphzChmmgY5jd$eKyIOD90P}sFnWb%Sr=ze=n=}$xd9g^s+Wq2 zVK0J8yf^`iR2Sj_fD1~3s;Gk!Ki08wC(4gdhd zO7ZmZmcrH_OH4RFQ*ruP%zde^qUNf{XXvgJST1Sg5{~{Y2E%)p@|^s{?2ih8<_s=t zg(8itv-6*X4tWoY?Dk>*u>$)y5;!^1+^7X$;@O2#kP8X>AMCqH1(Kj^8GwY`sPiR( zZ|17=3^>O0Jp(~Bh1`PM_fQNrS~-^DzChj)uTh!W&->TV@v_#52~$|@a*hvE;$JtO z28PwK-Rf8`62s=T&z?9Ujn#|J8tohJMjTDzozk?a_PP?eMO#%CX^fk_N5pYPo^iD+ zV9*82;4i??K#05*q$0u%q>{oB(%{h0_n@5AKhty)|MIFk06vBCVyERq$6eYPAxY)j z{i>(7P)PZZDs&q*xS|J?!`rzBPF%IJpG!DV8{Kza(XI$5I}u?K5-iAPQd3q79>N;gC#py3 z6Ej)t+Brfn?EuJ8lyRevoo={qq2o9UEbh4LWDg;1dpva~dW|s0lE_=^DX3h{+-nn% zUyVWBRS6v|H{Y^3P8PNBiwI8I@Ij@H1CftCP2EP{TVct}cF(}2mGqqkvRj?qzvp`d zkCwaqvG&%V)N}IuxW49ee>5Wh^q$%nB0s@K%6OIKc)vOFD}q4k=GPkiF>!d0P3sA;z(3h+$UZ)57T}H)2rS@cy$vR4nsv#lL2y|OM zux3k(%*0^tv|pBnfvn8(^(O!nkjNS+Eab77yI&t~;qb%$YEq@aW9pQJGCtlN^VZ+J zYV>?&NfiO+blcK|k-pbn9ZZXC$sZis>+`9(_i1*Ze1`+`^YLkckE#jf7E3f+A0P7ojiR72)7$lu#R8mF(y5r% zt`p$DQj4Y}P&g17QA(ne(Yn7R?IgjoXgEBdAD=>Q4LTp){Led_HZ5uj-aSw8Q{)_s ziU0{eQgfPFs90q_#{ES>u&5KCk_qPE;8~n?biEq2XwQ)U?jP2VFKk^tF)h5m{@9uM zAk~rmrdu^IlBrE=Lds;vbeah+C7F|z)zD8R zUO8)K9q`#JG`1mG_Jj1FMm**Yz=qK%O$UC>GMp19G6}Q+S3o{p6b}tsc^A|oR&>9q zXP5!JKzr~QU*ghCVS!Thrlq}I1L{92*{)+wm5aX+a58@!xgQWaft;HtNTv*uA{;(G z=Yi>@>?aw*i2y)vtAvl}p_bs?St41uvoeKLIu66}%O$O;BGbg~A`<(UG1 z1)D`vM_~%cI?(C*-unk=>20Qd{NSdgog=gyT;1Gsv-i5Ww6)H!al7jIp|wrg^^V6` zDlZ;7-?h0v5FNhQzhPFVtebbu7s8^$BO>C#?F&b$-+&npot%PnaL`aaAZ1R1xd~D< zP<+H82OT8^1q$3t01DZ(nN)f!7W_G#sFY0sNeWBO6%N=FP?Qhi1KNUs*P%-Jcg)_I z{3uGpEE57?;c$#?djw%4U=nOvZ<$2j_X~*hz(6I5X2yG)mwCm zb(;hDRCw*Vk5q}_QN$&uLO3KdkKk$+aJ$eg`iRP)I3oSw)kQ^sj#eth`IjcDR zb7b#Kh_zW4Kst+(K`5A*UYdLULdQT*q`reqfYEJFW+?1;1zW~pB0=XqAX)m#^_Ypq zb59~@SGlKtbrYy+RydAmp(PAXt=+GjG{P`8M zN+zCQ%P;5q2AJGTQSs~1`$RN?a^Uy#)H)|B@fb^wett-7g50yvPi3CQllIuRj6WH^ zeQj{=^+sQjqaNS%-E_=NJcj8O>HW24qrBCjF`rvLD2f#Y01~R9vO^Ys*Y#91mi_?5 zOO-3;IFa^ij2)t5T1=Vj}BGs^Pzb8LyZ$-4pb2W0VLq<$g-D9z|=24*pp z$A+J4EEG0iVSq7D0`Vz;+b6S}XFBWryZmy$;hKZ^`=OgeaCJ*zU`xj8tM>N>2k|n~9t}d+=ncm}O9d^#nM1^1yTy zZ~nyBk3Si@jbU%UsY0_tYgwmU+%$wPqlkvhoB8G^mgrl(L%FrnV_ie%udE&Hb%fzO+T z_vrHagTNe^I&~{<8ls3gIDLrh#$ct2LvB-WquI1aJU1#=N6ub^2M~S$psuvrFpJLXYT>%*pA& zLhGzCr_Wchn9S8x0?VHN4ihcz>x%icd|C9j%&BOv>DnCfRC7?PZa}3hZnps^G6e+7 zGRf`xLcXeAUo>2F4KV^7ZI!(@&=h;I843<(_Q#=vv%$~A+*4MV{?&hU`6zDUOeSoMr&Ky#i zW}rR-*lUxwSMhvCq#N5{lUp^&m1M2WET#X#ONV6CiJ5lIRgjGI8E!g7)Y9Lol82n5 zXKWPX%2?*%MYe13xgGVl0jVmU8XD4j@loYE2ylB02;|6i7Wx&T!55g9g($t{^gPLU z`zFAPdc1+ZHs^~=-Gd!HCNq?_#e2dnGm0dS>hs1(^%J|`$soHR9}yN7xueks-Z~Tr zj1FLa#Ty=2HbeR|3OOO3;Mux!t{~sd*Sz4z4gct5e^N~dA-d!I23>WJe(Mc_dvKa2dCY2!FmB<@ISWN1A&dn$P@iWjA;T>Otm8L)27pthQ|14b- z1si1!6@sLV&XKUlH8(f+8k5Q?nxqe|`!xm8($YR-qkf`J4^gOgeR9p!tVkW@_+xP} z>Gk_k95pI?z)-AkG98NC&B1B;2(N86MEB(P)qmT<*DQm5c4=Q%<3z(DNh$(dfO z>~R4x*TbLP3n5fFZG;>OLN5!$cjPr&TU#R<`~&pOXvMUiKAP3NGFaA@4U5l55z!wg zWBdLqCCGz7qR8Csef99HVwR}E6xJ#ES&ly0!!&-Kzpr2r%1a>ISo_AwDcI31MA!K0 zX?vbU-Ub-nVwesTG5X?TiA~6kfKqJ~nqQ{{rKqT+T?c$V&uZ~D+`c&|-7v!q+MMhR z8`P~ZwlB_o#ZDBI)BiKuuA3gE-Zc^(=d!HzwW#;yl5K`47=EqgRgob2F2KRaU{vew9Gu-jjdKD~gr5Z)U3ZAuO_&_FqpLN@LtgoOI`qwQDKn#RE z*UT12%Zo)FKX#8R0z0dM@>0=3RZvK2Orx|^WQ}H1j#&8dAUJrTOzV>~6vzRXFYQ59 z@*RT9uF=Zes{(11{jLIsQo;KoOLoq$0$RovlX`*tab-iI1Vn_1{=T}O8nAu7e`5(+ z?ds?#J*a{lD@-DBnb4s3IcpmmH84cJ>J{Ys@IL7NTs{P9&p&W?VCV>1G+^3#v%wN9 zG+2njduOP*O!%|R+E3jSi#8hMsScr3*oE7$Ty-MBhXPR<_$H9U9K0IHc73#i zd?UmC3>XbRTFD6Cp^3f9{TlQpUbuVK$`a$|35|RfZr;B3-gW@&D-m`IN2#7=tn6yX z5&om(H4BP&QRv(WY!Z+S`33M3UAV#`GG7H)$I+Rbwc9_&Baz7IO~c#Vs+{NdEa&zI z`I9w%JbnhDm1}Hw=?#stYhSMf0i!6Rk>q<#L^PT%6HoreVBz*NTgDmghZn9{!Z(KY z4=-g)SX&|lJr7x!kP!jk`$%5kgi$IafL{g=S(T7NfET!fiic8Na5G%A9+&~RU;*hn z+h53H00qfxa{|5#IZO;eSc71vx_C6sCZ4^j6;d?4x&os-T04OUz@VnoCO%1nug&nI z56=1^ZJqaNa4YlvlTDVGt)-Rk2j*HCFrf2eskG@`A2sif4B@cdB+wUwt5 zDDp=AC>KC7O#S3VbQR%9dEY3xfUknz335++{d4cck!KTms?m-@hZh|oK<#-84?0lYg)ZuDV+IoTK0W!W1*mC{!TBf}Vz+zSulW1tOhj zoN$j7`S;tmQMNzD6I$TGVeISAgPnhg-xmnQa-(1OZ{Wq}0;T_wsP7hC0IC2^4fwR2+&=hR(LIc2Z+cpckO{pnwpyoJ*a zQF|GmNz-Aqf1kwX_@F0K_5up-?)J^#_^aT=I-G@~m?)rRLsrd4Kx{w#!=)PHaUy8qHA2u4P9I zV8;ng-5sK1NJ0DNMqXQv{g9Dn+m@*M#tG$VT%j=nN1MD9ZurmKoRogIv_I|ydG0o? zMX9Y~=j+~mTeUFk|84@eWwr`;6I*V;w*h|}%L-pZ?LK4rH#aTmI79GCa@GNdgrHvV z-;|HM4hswG^U7l^{R6e|V|5`T1USt?b9e+|&rXOf%W?S>WBDclGC8=l();do8~TDg z-=tS-XE`D|dU)awXzksNSoTR?&cYP``|Y*%b7suF%hJp#BIi1OeEId_68k5CZdmzP z8rWb7!`Y)MKkep3v_V>SdN3>d^A0Pi#dnJOqVphP2{40DyYIiaSk^LySPal6Ad|K$# zKqrcygE$3U*u|xpkX;sc418Tj;m#1rPYcH@;fQ@B*w4^;GgwhNk=GgWgr-?efhzVo zI%K--47ggJC-~|pDWAIWYr*YJChrASs;FJx?$17~C;{gC0TU#^l-0lK&hQ_#n#2|K zI*&cLdX{L7HVsS3Wy-TR*P2TMjKXSNn1^=mqTi22Bc zdkjms=@4373!pvRlMw`N)Ca|$@5I^CLf|Yq95)}<4Ck>5#dsmkIpWe9Qa3`2vj#CQ z;-r!03_OM^{yC~G+IddP&daj(>bl`7fVk;U!vHba-lXe4;YLM z(7i5896)Xm66NPifk+4d?JqgYU?DbWOaX(r{=34{D*7S;SAU=+QAlhkKE?vR!`6c7 zJe+?V5*n_quS2J(V2CdB=68|021sSZU}EB_&Wk_3)r#xOTAF%_Zyv8Nh18>me^ZZm zzC?1;UgnB#ioDtn7ZiE6MFU~pHhNjNV+45KfM9JP>R5!WpqT}&6a6S9zu{}8x!$Z2 z7Zj1g<_|Sj#EFgdTVP<|mS>nYzH&=QNJs|zclc%IX=lk`&K&F6n>al98CCjs#`Bl0 z-ncsMK6nYU0u8hPM5C3?6E8CiSx#ePW5WSdQs$z^@Ut#%m*YL|i94mQbiL`wV-c{c zRK5<1zNh-_pa&?eOvep8_0rPaLVozoV3P2K3`2x2DX1O9p6w6AEC4Q6luZ6)^Q6S< zFA!5tccS?LX);V2W2_LH5>q%rF1Kr40;iL-M^?#l$R5J8G0-FYPMoBPtUVCgkM@{; zjx@X6J+0wEpebDI|NIRpyIboK?1BR9rQlUT?l@w|mV=zeBfZ&)!Xyug$1F@YT0o0r zUvRq2Hh7*q=N4SMLL06I^bi!Lr!Qawg}n^0xre?0Qn+W;`TTqjEx1Iwh?TdWz;>ix@%aQu+&#e$2qdg4Z5LR@&xtA(? zg|#J}(j2lY(qA9gZd7LevLCYEYOew0$1}gbEQ+H#-&oRK-zhBgQUIg*hRRE2OLO+~ z7#)vxerf{EH{eUfQHl@TM&X=sm?*#S)9)1(>SBT!ZIYks#mBPVe@cv6< zf;L0s>y63k=Fn87nQ$>4{g)|>!Vg@gwFCLklYm*PhD&x~q z?SK<08oRwGOzAH4XedhWqR=N9vyDyc(%u=ZNVO~;+PMZ>e3$>z=2454f8l4YZ$0q} z&^b<^E`-|+0lj^doz8n_=z0~^)H+fdJ1@1SwzliR-8B(R_DKG-CsXIW#{pooKP7T6 z^^@ButC-j|$)~AA;Qruh*kt(|SQSiG(l_|ug56j)4vDDNpWeA8R*eP!TvXOldZ}O* G_`d+XDI{M2 diff --git a/media/fig-09-complete-hierarchy.png b/media/fig-09-complete-hierarchy.png deleted file mode 100644 index 0419523bd451ee6fcde87e43eb6986e34adcddf1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 38236 zcmb5W1yCGe*EI-92$J9$++70%2$}(cyE_C6?mDZR|~d9`oVDz{EF6h$#P0caou;0MOE$O7Ac zUqrJ8tw;RlFo+x`jO*VCAcRmD+GXgez)Q+kzg#K)^Ow;GKL4*LuTm~SPY$J3Emfx* z8=Ha%_H=fNdwBTO745A$c6D`KtNukJA+gbMC@nd-a;49!uAW>jCX%N*oU00M7DYf5 z`_Ct1u>JVpP+V=bAg!pVsBHO${L`ne)8$LFB%T~vshm;Io8MgN;(nn7pNau;8d_$`oL=0eWT zC1v9NCCR@kx3e(Qbtl2LQ~f}fsMSJxSa4n0j?e)af7j1m6hWusW_{qS6L}t#bsp^Rm&91oVed35-TdkmU1s6J&$a?KXv?o9e8oSa`K*z z?zmhfH|451mXZ^D=5;U?ST~2Al8Q>*F8kl@t{&qnPBGAsQ8r~kB%n)AOtn--Q034JZCa=?T=GK;E zFv?MdabUw;T5__wMwLm#m5wI76DpC!DtItcaAleN-$zoiV%bEb{j_P(02Y{eUvKXp zrTrktvb)kS_l7jM(`}xA0IPS<;`N4~VBzAL7QA^oF-R(erz%FfBf7jdQ7|(% z2?~gA4qh*d5kA9NhR!EGW*oI#Y-{9Gi6haXO75bpV=UuGr*Y*E4$pWWKDvga<0T&+ zTBa9VmO++d*@wA>sil)i4SOOJFlD*hhtUZ`aJXK<$0sEX5K~3rJNb_Bd_-{FPY|k^4Fd4P=f0 z-8;9oJLxWx_V$(4=2IZeT4}@Y>##Sl$HCy^`kofy-enAu9U_+NDK)(!buEZ>`ZZ+) zK4<@ZvsRG=c_1oTz#-zli<~~3c=rOUrI4SQit6sY4Bx7N>v`Wp*~7O`&*120*o}UD zoO-tl6Q0dz;at&hoXV4y2T4uM)O3F!N|OG&bNK4}q{KuGi#*j+qUyh+m!P8fgoH+C z`7eI{XPQ04;8G(aqwgo0I^4UYz}iDvNe0rQPjNA*Z^ZQbKJ3jGH#b|Z6H1OrGeok0 z=;$nH1{7>t({yxfqNHNuxI!aJ*N2O21$Nu`DRk1sg}zZSM`jhlFT!KlJhvN7EkJCN zYAj~@vqi(TGwJx^5Q(~aRP2iP4eNT%^{h<(o#l^Kl;A1Sui~m zI*raM1r%rwPBOH*w8D0Ng9nNVGBl4C!E|(V_CuUAy$zfP%59{b-CdF@#tB{|G`GwYksq?| z!o!Nax8ETlDdW8@0exXjh>!1OAK|0mTC^1&y;z?=UC?#wO2EL}a{A$q*mCta&5xk) z!)cR&iDAk?$E$1Wqy46M-|%o^B9qqsj*7DK?#i3H7~CFG{cPJu;`VafJ2r^!!}ZbC z1CU=a|5y0H$q;HHa@%aNpELHZsij@U5BgqIS{e=PQkiA99UhCuEc&?6kKpU~^(QSn zU?x|%YwmbyrGRvfKE6DPliSRsKP8`Lr9`ZWs)$cw1G`d=!8qc%A)i}FA+M8^Q ztNuO%;!C6rh9BFV{jSkqTf~@AtFG&HWc<*(s&D;yP)v;oEf5Hhh1%ew8fQsc#reEm zc0qm$xOzAYT#^Dm$x=;C?GEOcw8RYRC}}SdW+JbmH23YI^QW7Y0WSc|EEQB4U+#{b zcj4-(pZun9!JMhDuP+B*jlSOd+J*tbe|w#S2|_>=ZaNCvy{e=Pz-F}){u@wPiDX07 z5c3}9??zz}5oM}%zVsGmI#ZgmgdW*H_5@9$YzLzaDS6qovai3r zH=wl|4wVQUFE`}8o5#>@}Jr{)i|bok{`e^gU-d z-{q+A(d1~I(|*~wwF7Vc{*RKT)P*DtDQ#_5ArlWi;p@HTsPtXp%cgc^w3)pi{}=l| zi&tAZ^rL*v{xBgmIF^6;SUEXWS{@#G?~LbJ>~ES~N>=yB^TN|71X)$2<$7FuSKBd5hnDbEcV01YFkfkwm=nJk8trfq{Y7IAoNIzg#Ug<(mSzux!@)n5+627{`Pz zvVuAejL7DYkokKwi}}tvpU#gUQl&0L(%b~_7Sp9M)=Le2)n*f$u~N)qY^sQeh}X-h z=ydBtXvR!z)0G~3_IHovBgXeq6P!;yB;LyG!W^W(X0TB-Mh%S#y+vYOg*j1)zjzMu z8DNBGOhpMks}Ft4^f;F|NOwu`GX0Ux{s15`xFKppGD^}hO%22E$*XX(bga(BWWu45 zB<74w_xmOIf13q;Y+r*J7Rk{mc1Z_W-wywId47=kt=pXIbTB=hB@BNDEN|nkFl>6) zF@F_PCGttbvOxrP1TL7hw?BBcA5L0-pZ)m2t_-sdo(bks0EyeqUQG8IjA*(3&48SC zT70Vi4Ip}KMUB1;NWQGJG`rhw7QFFV#}@kBMXmW1{oYtkFfcZl_IF!JGm@xUq~v8g z$ZY}VBUIk~O^w@q?|wJw7u>_I!y&(0@w`b-)xsb7!4JMx_TAw9^l>u-;tNdR%6`EP z6IzQ&Tz=0P5VGSvujj?uE-OzT&Bl?h6O$HfT7|uy@3=xEoCLSARyULsF>(tL zkdcmm!)0P(Vp=uB3tL40-O0w;F=@VA6=R!m6>JkT7nit*$ZV@Au)MU zTpHdFl5;LA!_&13BjHIltxuridefMuhvNh1)EP1rZS87C?OU6JXT-A7h^VMou9Zbf z!)~L(KlZ}X(($D_S^)lw@7H@CA9oj6lKK@8@ZIfh>uQ>}rphS|oVA!O8cS%~W;9ok z!)WA-kMKWIgl#cQCGXJYTJQ^n;1kv|zYT+0`6MwZyK@mUc=pwpa;MfWHk)0uzpiW{ z^)f#{naw?%{zV-)f;2KfJ|kClF8q!mTYd$r5WGSvD5%Gu1nPVO-nR!5$;lyHHzy|* zYWdXJAUlgX`K^=ND(t0XAoX%0V_zaR!#NP8M>`k(qP|%2%RN!@@L-+cScU#!qF)X| zJu$Ls#I*soQ39|ik=zc-bSCVCd~*v5BxHwsrd7+i#yBhNU!GP5awQ(-7OY!@tFN@a z9Bylz#OO{t2Xf6}+a=aLYg#v7u}r(u_HZVBvihp&$EF2-G=Ke{>XRIgJqY<5Y;uK^ zjNSX6f{yR=YT9^0TSae<2@UIA^cl4Q(D&I7ipZ*GfYdG#_`b|sCjM?@QTMfb1?es;^ZN-Fp-KHTbrX|(kMlknbJfraqqml7>QZKbPAaf|RKB^n zlX{SRZ`-mOD%qR2A)k}u*4+mw54ttNQv8qWcbfPY^t_*Hx*elIy&0pHl`+CqeWyVt zachV})P2VR!E?DO!AHPORS8C4Ll~r9u(4$E=3j7kew8e9O*Ers;`OQOt4SQLo=3%d z5&`Y*A0G@u&0Wc_P~W@?7#GFWr5s3PiknSV2S<5J4oEu19%$t+W}}F|G7up+7m9|% z^gxql)$!a;G2OgB_^qZSLTK}f(Bl^kGZ&YdC~^A!& zsFdOpP;;};GDgFbxXXiUPe1s!?fgm&9trYK*e}OzuEB%g9�4szfCd`Nzp+ja`? zPOkrq96O?$;kTr@&8uluT8HrIMa%Q7VEs>XGYvN&JC4^?eM10)W6U3%OfYKYFV^Jo zc&2u~83#_4sWnfaUVx0$*1r0Q^g#3V`RX^x*7na}jRWgEX&G-8G#aG>FA87*Mb{Ab z+nJ3wT+vlJF+lS|F0qhrTiYmWscy68hMKOMD3M^AeLr0TQF#Zlh>B=mR(wH#~`2}s6LZt=J$OA;mmdNLU(xyEdQTHhz!`|`5sc0Bxiq9b#&k)-7vKF(96 zcpyRKnTr6Xt?nD{-L_;dZ#QYy2eMRs$xtW4aK~f}@@Az1*^joXLVL^2jcKmyp_T4K z=6wSrad3Jp(cVipEO*zzHe2a8%X3a;f?hN$UN2>jEt@V!W`RDAUimg{J7nEyPS)kP ztroQW(N1sy1V9=y`H{01%FgfTust|dm(@j0cJ-e+sC}VL*FXgaFV(%R4$f7yghk-- zRQO2dOCVOll*2HO7+8~QAs*<>UFcBvnEf4OP7#oMd=zoLTh383r+2251JVlf9$2LXGGxoe?2f}kdr2YhXL zh2%QO*0ak^mpnW*XEf?0ZYELWV(sLe)yZ2Pe%qsl1^SbNi?scN^&kL2m<7L+P#W=z zJUnd#;ic{?ZV_0#7sRZYJSkjHXMu-9cFx~VtNy5$bMuwPQt}R3AaYnJO?CEqUtU2$ zLUqe0t0=gz(5M8Y{?N6>wc?S(yfs$kXFD!Ivqz}0Ys_v~f?(%oX9iSLQ{Xoaw_grZ zh&3p-ahNo7E3)X8rH2qi}t$dLa{ z!<#n=#BYP-mxVW4Q?berPaQZy#1@)^`-B>8! zI7wc(xte%hb~~v`n)|$}4Evt?@JJb!)j~KAZQGA^4%CH?{Z+Z?Z+jrGnKm99mqEvy z2)4KH*N*_;K6xbHGstP89~riThGM919*Wqt){Z$sl{WsXeXH@LMTw2eW|PrFbst-17F@=iJ=w@qA=qCDc09^^G$x2d-n}e2uxex&3L6 zv-K|CW<=-1Sq1PsLXq6+=Ie;J$)9@rfayAX(DcTS&XSj@`qa5Jn(42Ve9^Yra#?9mr1E$+N@LQInE z>`n_q?cuKoZ^+2Vh6Scyo`uLhed?y)`9nN!+TGKmL3U{&BaZNChI}*Rca6$bJ8uSi zwU42>{bIR%oJE-qw}cBtK@0U>%gypje;{sctn<-lEiI?B^{B+OG@0{N{{u!gm+*9) zJKNaf)6F!8;e^ssC;Ro#9tktEA)VB??k^xv(_k8pT7%=Bp~_$eM5&qS$W}BetnGYj zU@L?oQXZ3YW4ej%quDuS3ah!_CBY;rJL925m5;p2$go5#X(qAHSr)r0kR`1H$)M|M?RzDlAex%r`$uD%L=dUgomj;c%e#^m4>S? zjW_(%i7D5+m88d9qFOdyVL-_-D6+w-CdzL)Uu|aJN0qdz-jKl$p097>d%AQu?x5YWV7q)p5c#w>F>5jnPQ+bPph zvtIHhZ+7nrJax1C=>0rWX(W+7n0ywL9v1W}io+&@5e|+-OY3e~WTvy%`}v~k%F>OX zchT3yDe{VoHX2j5P1`SPQq6En@8!Pzp5Vx)EcheJ$Eo{BiZYk48A#jnJ^=fsq_=+5 z^6<=Hnvp`k)ENA6!;4mAAPD(qS<(tl7?tUFq&QMDuu)B1QW#rdJLb2eGeYOd0_Y3JtE1-K*YeK*xGpjAYE-lyfc zS$xOHwk!$0Ge`_Q@41|ZD=E%N?%?e&i~cB7osg8HHZ5z5_=fOvIq0dWzu!KEV{bm) z;pzhl_o|Rat5>>+Sg`8yI#*xscLptpNy9S=H#HBB1i@yl&CRE#`mT+lSQMi9f$br! zGNzjz6&00zS4r-UA|`;s-~mcIWPnzSfu%pdyYWM;mJ_%$DvO`+U&&BrSXL~vH?tq= zS^z=_zYhZ`cPynO`o&4aF$u*|^xU^oz>jtiJ9}J6-Cw${k}HK&g>y z-XpT6-Ru8Q+F&7+N8Ie**Vi|-?F81ftN6Oj-Ck-bvvIwxOxj@#u*2SRT zZh5#~u$g*&1TDZ$@CIK(>?#9pJJ_I9c?ij9m2YSjEcH#Gx^NzneBB*9d0}Cx`1$z- zC9fbjNDH;juf$#~MG@(D-Wp zP6&WSoYQP*{TufpvA*x%x~We0a#MwCd+5iRpY7}Jwa^a-fy>|5i5I0{I*^lHexwW% zV@BklzvAa|Ma>Gv^NZrp`HvR$PFYg{Jy2qv_oOLjTMDRYn~o1wT3fp*xzW_<5b1R7 zCipy~qDRTUtbbeY+!%K#5G+10vmYOIiy0|<9c|mIg{lALRd*E0Z#MUJ829bA=`vP! ziY&h~d;QK3^m!NXvxY7KR z31MU#BPxgS4h1J%Ymqs-ch)h-8$?W29IWIFo*$@iCK)`g##C8`?hkBUNX_NYs}ZXV z%E8C{Six_0B=LR^Pf9NM@>tcK_PxpO3siB)m4LM~&@5i@da%O&-mQ#y{DeqW1K;HD z=P<&rW+<(W%8G;5mV#8ogI5qz^lrY!f(D+*G5UI(2z*#UKdX=OPG17%hZzH$qY;#R zC(9i()XZ3MKVTwC%4-(J>E7Z4&mt6QjaDJ+zeQO6MAG&^?z(~e`<>e*Y06IAB%y1` z;o^r0t|Rx64-?Y>UutgINtl)kBc1f6g9MuG$s!W}fs@NWzHElw-}wOC6@f^GJW!~R z!|^GXH z3cz6tOZUm{NTL!p;MNf329ud5*n`a?+W@Scb(Y5CFi8eK$jB2^;@pn+*@p+KJugYO zxFSXD6dW^IHd4F|KaK^KfmUCv;LsY;v$YAjHost^D{_mE1Nw7Fw z@c}##tQdYt@zWtP&7Y|X9cWkuyYdT4(*b0>^6aw5g-pcSG)Xe?C*@;cY~Z=KVY)X@ z;+15_GO~bryu`L`mglm+JwX6YXGwp#>fs2I+3mUll1Iho5U2Se#zN=iJt&hD7e@h< ztjZ@zKKV&CV)0jo=QEa_(kUNBWl;aiAEOG~MqAFF2D|eVhif6FmX($+es(Edbec(b zHsAEWvlDo_R05amRXFY`@|-+gP*P88jdgc6n>-hE28g83PA|%_j`gyf;=CI6Xsajs zPJ(nn<{#7xfFg?%vY6iesggYK&GGq|7|D?(9QE4Y+`c%YjzYw78+k!=Zzb70^{sIk zC8NX6ihI7G{?1U$sp-e{E;ZBGo4>k3kEGXan)9GklJeh+r!I`e=o*>0Ft=**Kan^PJqvLE4ApQ=OUshZC1_maOzBz3;M9(0k>AOY1(9+V5cK7SM z*~u!)XS!{FsDHjc@JAxwfW>xGR8lHwej5S-)X*{n_{JAu9+q<45tXlBgS=Y1J7)B} z%PxLd%~wmz*IJbvUz{~2vpMHJw~HLt;i8l7LglxEncqod!a;>}r3+s}^O3r=0L;|6 z@E;w}HNQqRiAk#uP~4TuY#W=2?l_#zoDZxZnl+>`jnCku5S8C_y$sQl+p0(9DZP_) zN?Vi11^bk_tD6LJi8q4a6-9H^=_YcsM04X;(@bj77DO{0pNF%#Th(Kgtn5@ni5EVG z%0NJ;SH}IqC6{0)Nsmlki zLE9&ScC2xTX-#?Cj!2KlK<&_i(NSv66cXI}R`*Zq_zFO*(`^PzxOHXhGjUR>ST|mL z;yJ0jFghhYsUku;bi3%tfQ+*7JA~u1sG`HO1+V0aMG-rrjLZK@>#p)^MK5J=zdkJ5 z*gY-%3+nmNOrguY9f3Cf>pR@tnqKt=xIGXHwj7c zV94vqMg@HsgzwcbC;!+)nVKFlis}3_X2*;PNMIqvJ%o`l6F70t*#M5nJ{W-JZ#>$0<1i8I_{m{l`{`w za8tww1TLSq?Lb~Scp&aT9*$)FH1Ot875VAd!d#z^0v~yl)bGsE)jQ2`T;kqE=rwtN zj?0Rt+Qo1Nzt#q)#q{?@EGQupz@+&FM1+VwZ2^1P@*$B?BRe0TCex!b_5BC)gDP`L zVQ&G1U?dWexFQi_&NfAJCi>ii@n65t*FCIn5GK(m2WNq+))W)~=wZA}iwoNp?QIC# z4xt%0={{@s2R7-vGix>_34K1IZ)J zumPyN7M9e+sKH;y=KcO%I+@cVG17vq%ms>BSkRCVY@I(k?#ie4HppcP>^JQKO6^$v z@akA>0KBcp>mL_QXB#~hmfe9ZrycL`|N5e_^1H-9)zVxEw(Uu_Yka_Ru#S>^oaIsu z$iSbO-)&T_64?O+5AO~adoy2J>1Pk26FSy!D6&yLd!n8I-^>~0kM$$7XN7I?#S6d) z(bS?EMKcnBQi-Ohr0<9NyVKUoeltd_xQ>8T-^cwrjmswE^ITi!8YLgwML-6i#p3^E zXgvFhgdW^Vme1kC-6J1v3w<T|%)_|8kCK*M5~-U>qSf zqng`9Re9N!xsV14^$=F?rZCJYWP8hT%etNA;cit*vBa5Xda4c!+*s}j&hj3Y*ZyxE z2+*&8xn6+IaNPct2M^%?mItNV(twUuEHKdCb`jZ+b-cD;{5fkA4)~hKCt+lvcdoNp zT{;dTur5?Z1L}gf)KtwQ&R@9jOfRl-DoHa(Zf69mPr+>3w_Xtm49_P>caKXW#V;~m&Z>3)RMA3&@TE>raaaJf6f1jEx5uV zk18)VEtSzBe<&vD`7AcQ$QeP$9Rs?qZ((2k`oC5t#S!%L?mDkY1z1Ng-GfIvh!}t2 zA~(x~JRP~b-So9}h{J7{(Y+i5n8&Z8nB{YN#HK>a^5wYvpRCUt_*t$uT&#Bc?haD3 ziA2wL)1&$cY?{c}byLDeIax%vkZpEkw%9ttcKR@w4J0OP(kv_plN* zl_=FW&tmqeDU8B`a+P4!@d`DuKD_8Pfy<$2;)l!8*^_6Xto)v<3*s3&Td}>-R{s~i zC-z*OR%d+3{+#5*yrUTf(e<9b_r&~vjXgE5I#nb=VQPyUaw`;+)#A~0$#v;vx13oG z8Pqh;EU_4tW!t-eCG=YOi-NCsM$Ou%O--^Dq}=H=&g$NL< z1Lcp^&Q=H`y?aN+ zz`*dm7jO+ME^0^on>YY1IV3=5u8L$v6L3cKc&$XIr0m_hqu1K39Mk_^xcDYmo^Y}m z)Wy^1@TUuRr)3|9X82M+yuI{fUg-&Bjw_HA5beGLSg~UK{9f%X)>kR3*iGw#@ZX`S zb>(%%>P556!OejKD*Zmv{icOn>I`8K<9o%%#sV%mQB8<75eJ~*NQL7taa==IK13I| zU2K=1lh^fGTo>f$14f&3;s;ORpP#I>^;kea%Fx*Y3h=|8lYSTbzw+2RR0-3HTlQa; z`@b`Ihx8bXJt}bCHWf;$pm5&t$tlC2n>Hhno~GTEXenc8?v(V|aX#^(?VFk(3$1`z zpaVNea4=mK2S}6JC_I~JWuw0^#1PE*gtR%OzEhi-nZ*J|2C)#i@T9s~?y|{AtOx8<#9tNRJE*~{fu#A8C;1W z*#un+_q!oX+DqLDEky?U{gBjbswD&SqgwWF=tui$E`1&5k6XMH}}Ks>4pimR!3A^$iX=VprN%tJ9S<{&vPzb<(3EXt0d-Q-!On{Q0nf7Tx;C&7ilZY`<2LQq)Kk zg6(w!kHj!vjwVstT8es>a5i39m{(0}z4n>Z+fvlBnHX1xn8}RFY`m$ieud8$q^J@5 zBx8XJRJ`Kp=Z94zlHa~%L-iGBBp`*3v;Nw-i5l|mU07*iVsFjd11@q=y?w1=PLhlf zTq&Xz-c4+yP4Z6Q9Wv`M2SkcA3^Ux=v)_Gg)+FcPkAP=$JWZm-p^f*iKV+W%@zjat z(BFS$CMh#(jYE37r=Krw-w`N&68AE1`Cm723Yc{yjg9l9lUdtkfqMYqCyrMt^3*-F ze|(NVvp);PZA8M9OQqe(urD_LFzkqS*A zgEq9NT`_J$`Bbxq%o%w5vaUFlNNoX%5zG^yI&!|u&wob2Vg!1E+%kY+*1qHvVY6$i39vSq{Jzl7NrLg;^Km zdXCkN#j4z!Q~?2k9;t|AC3&K$DP(nhbZuk&B?z8((cj<@LT>zi}a{)N`>RzcFruyPO( zz~uo#B%AW<9|uU_JSgS`qz|*H;)Krb?s+gGS?9Y>6$6Q^u)sk+DXiOo*hLIz%y1-( z*WT5Cb()(?d30nmkuNhEosq%Jq**-*CB4k576Sw0JGle`A_9;TzJ-Maei~v@ zSjEK9qTHwKBW5pBL2fgx%L9fiQ;Q7?eMa~;kp+FgA2F2-1)omBDvk;IMehy93`{maP zq{#44%uvX*Fyb`Xe2F%^5LV5gC0#niIZ@Vvw;m~d#wqdoU|z~zw$f2!rpqQnC~M%J zIW|5ix1#mQ_m#ke(kkLz9rK(HrFrm+UHRBFh7FTlf?fIRbHhbTnaG8oBjF{L(B7clSAgWd z6y%JdM3@78C_iFTPu+u|vb>|WFK&-;9=|t_`mVXk_<=oQ)}dcs3pij?*bIX%_ot-X zFLw=2{%wAe1eJtwfwJbq83(}+^PYu`qlSgsD+-l=1*DGrbkoCjk?1NC$9ENHvKSkr z0l+qY-YBRaLa(Cq)nW*$tmG>f^QmN!m@@ee!iOt{8Cm_Tscac=pD~ar;8cuJ%!1xn z$GogXUa3Y(_cIW?d-XCDnSXw^iFMMMz8l(LSp{!Fh9RwHN5C!@$J{=Gtf->K7Nk3; zrbE#fhc4;ze12_$Y&x&RZ`o$;zzia=uh$_ zd{eaJ62j95*>LIK<3QX1D&?0D;Cl}}pG-I)#K3{lZ^MEw9(xOr-|326d@qj|_G^Kr znifuX<8q0-$I0e_3r9!)!>wJ+t6Hv22MCsle^Gb`Wf4)Wn~zlKR&<$f1Z;ncXNjwN zC05H?FpUq+yF8THyN^s5@3v*RYel5-DQo^$v(QqOF5JZ1Bs~pY+P`g^UInl=@4w znH2*4Cf$OmbR*##Wb@Qr9Ksm`C-}KZPo>_Tc8l%W$c0?;+Kyu9x)Q)FGC(sS81vAx zVpA}Q0ySPKzhbFdeFdy0&%^iz10dIY^lC!4adMKP8kj6{=?B)miH(?m6VtLhjQ8c3 znEifH!93j>T-Q%;+T|wsCQ)KEn~g{knup6JvHpR9qe-3I{M`N_$O_|*uDf+a=K?^c zZUJ<0%vg@t0$sZ=o?n~Y>*sE3{-YrKWkA=^7A+t+{h6gUgk0FNmcAV%KOC;x)d4yP zToli8&2bHPJo{SJ_Z3BRXCE~=i1gRE7H+$iIw$p`inaMW1oLY0;1XMk01koENZByC z*iDnZalZOHVzLH}>dx*?cLqPsH@fyq7-0xS;Z1d_9C-DxtvX-6_yVT(cp~eIAbZ`> z*?tWGlOMW;%gd8O8HX8E{>xFHPgl0EFSCYGW(RIMPeo(r!G2jfjkV=kuofU`=^uas zCp$i&B>`lwbo~0&6W`VahTl!!fB^+ab@NVca)Xhd-m@3mf&4ZxJ^zDgK7g&pOwU!n zx%m&({LyF>jdzS6FoW{g=cd_pe*m1cl~rXfq76^wg+Zl^2mIsn55z?%fUc_X)JqZ7 z>olVS?FTUK064Ndm6H7(41d?Zjf_k~?hqAW+8wy>mpOB}B(oPmFS8e4t0Vw~0?QU< zM{UhOzXtQa=L^fG5ojFnl5bLI8ncWANWeDqKETXC0Z12h2OTN@C>arA0Qrcp0_Ltz zz_s4{1)eCkS<=a@RuW3Efbao*6GR?8S7mDPPaet1xuXT_x@0k7H1j#zKggIAR5*Eu zHTe`cADdDMO_`=(tE3^}eKy}9;h3iGP6M)s*LFKC`cgXW_#|ZLo$FoUPLHqL=Y;@w zG$%E1{Se;loYH+3^ET_(L1G$qI`yGGb!Le(((^dg3DYTMI)R^3ta4RU86;@Mqd(1A zJ#Hgn&70ID1gyU?`$;svLne|SBGGv048`zPF4fqGtCXr`LFSc{7k10uSuD3|3#*Bg z0RcP&pDd$+vP(inW-3%;38k1OAUa<I5YM+YONX=*hC zhiQ|(l|&ZAv*YdSci773QW#Q{EL3vmO6NL(O_O|d-&IMC5sk@+f_)yh?6%xdydR!| zluztbDpX>BrXC2&`9vqh(`4Yiba2&@s-~KLO4>dEArPcWsdy!HL*cusmz-~bNzaxW zrLk~WZkg0YW{=+NwDx+{`jL$$7{IO=-EDuD0L#PVYw~=tUBC_0+1+q+DgtngDYx7= z*ffwh^%-n5!?UyFa@;ML{E?z}_>Htb!b9V*Qy4b=GXFvP+k-FeyReQNXwJTC8yrKcnXD~9H3ql)ZeYvp_JjcZWI(^%}iRMS^V^6mjJ+Z*6$aW9LDRcRtydfDklwD%#{>T5IMy1-H!UlW@g%)Aih6W=JKXr=1Qht1r)wmfF)roeDI`L)Xay( zxA@;n8jI#No62PyW*cubM4YczvGkwj1WB2ho%WNJqsN6AYWf0Bwiy_QM|tY2NRDnf zJQl+n0tT9N_t=M`SHE~m?Tfkum?&MqVrYs#=`^rToI92>}@eK@qBs03tE&QwXq zT;Eq4fN>%okjZ{&Vunk^QTOeRC;Rt^ zOUz}oYG~T0YM;+iecZX=882`l;ho}7-a+14*(=DjE_k!k=8qZ=-{L8RQAQO#@kSZ+ zqJ9FrY=uz;ij0{J~RN3X2Z zn^Uanw&5{i89iUmsIuN@8`j_GpRYgi;(xgZ+}9<&-w!(gF8;Q#usE7*DY*AcjF6>( z(7B%&w%G)aD;K*RA48IYteYzfy?~?lRa|+PVhPBk*=1=R@cXmGA4Hss@RSd}_#Ip+ zHwe-QkeJ*6tvX+N*OA*Sqc|9|un4ct#>;<$v)OQk9Cn7MmdBtSR6w;F-77rsnj`z+ z9kM=0)+_oD^~q*3gwh7;*&09FMqQBjlOhFBH53tz#x+|neb#7puStcBfB08MA|Sp2 ziWlTwmexmdod!n*mRKMH0%7(IK#WDdUA^1MoNDmt{(itFU*Gw!hz7_l4(L0=iOf31ip7-?OmfUe1wmqW1UPF>d#Cn-=1KNL>~ z)esmxk6b+I79i?*z2q_KR8L-A1-T>V8iYiKPlx_ybE`g}V*dJVLzu5ycNAuIvD|vh zBK({f_uNTSO4*T#?@=W5Jb+uHm&j$2tRT~?;XOV_uPhmI`v=oBGMlEeuQe>2bV~WX zz!?7~&t|?mA7D8!R@{S&r3H_ z!QfB-!~t^#ou17m-hO9s%~+~eWlM#fP*xCV9dv3H1cXrI?ZFhF6RYa&hq#cDrxEXG z3aGIP3tL047FJwRGB!!qT>>y-0oG?aekII;$SJ z!Q$_ZkB|Ea5YT2yNl9%1jm$BCBSB#I;>X2$M(bVvPby9x&Rsrhg=s}y73Lmd@6yYY z6+LJf4V12%!!88J3+LxL9Vj~;rW;GDOIPc+P&x3%qVz{5x9%{Wd(aPje#zWU*;16p zxu2ocgtLmZJC-;3Q{>*y&jpVcW#0X;IVjQniY{A`H2oA6TNSa&(Uf<4cNjWby1zDN zH<|hr(-asLRE|Xwh6TSrSv1y^Sswl2%pD1r2}@vkz6fkO>mYFMXSN34f8u#u|CEb>IT{-dB z1rbs48UdajN9N5~E$e2{>w}6+ptDMtK*z%@aZ#GsC5jg}_E zHM_-PNnTMg`(_A=B#j3W=Lei;4T9&uF7P*a_za8DafZ#Nnr&31r>$`?kwr(oQjr9?ejGh+0C4Nt`@MSpKbhG*VObps51Jai#OeM&{PzNU7S!} zcZ#VfCy0Qvo3k0;6la%E=F?N>ZpgEk&*kU6|M0m}cCM?;Wr?1tLG#80;kSCoNL>dv zom@qLGQx^Z6vLlLsj_yv=^YhHwcQ4-GaMoUrq-9!M!t(xc4tW@*fSg?fP-r^1^lJg zIA*k^eglU5hu5n5*7aLBBY;`Mz-Ef*VrK+0v!4i5=7YNLNEp}lE08`%sYK7y9aqo? zUTaPm^{*7TT0b5{5X*(u#B7}zmG@8L89byOWrUSst7g|kY^tbcH_Oc&Ir3X?m2Xt4 z8J{ew)#eSxL6}Dxt$fuosFs@uV8zA7Pt~!R!6gQDN3{!wH9;GFG?}H>kYbBZmb0sF zK!1Xx=k?(n{)R@0TxoynGFK-N*P^Sx=^SLBR4~3K=72`MYkKA7+GQlWE4(yfG=Wr5aL9!|;wJNGI9;0%{?0xdwOwM*<_0c@`wx+YBzWyu7>&@0W)aV4jMLi}~i} zrXL32>aLsk4wklG5Bq{~cHCZ`Zmx7{0iThamsd-Ajyu5M9o2OA0qAGVEMd8SqDZPB z8r1m1IRogBoDg_Nh72F^3$cg{kOdT#(hU9vg{rLW%qD+*qghRW;mwb8h3x)b|9mQX z^+ewtW_01zsKcx2QjHutor%#K5T&Am^-}Af`3v0A%&N-bHFvg%ntq@lEju{`+K3OX z7wa83BOOQCBhC?;{aBeqS)3MXt!fI;bfYSu-CncxKdBR8;toqKHCmlL#SI4Xnas~b z#DxK#P8|6b;B9M!0?g0`7Z+3O0U8h`*&ejm@g4g;t>o!%GaAf_dbiGI!4y0fy9&F^waTD<3OeHK(1WoD1R=TABxl~UdBGA z!Gy_N<V$-MlS{AxYm z;yan_WWuvO^n-l?P_v4hTMbV^_8EOIQW-C`Oa>+9SD9!p|9iI~N>7Q3Y?s*oFnmqvc?9_-r^OT#T9A<;`;q%7SwK$;;6p^28Lwee)N~ak> zw|%yjqAG!%U$t(e5qj3DG;7SLgoBZ5l+5A7AP9l$WnMxCk$&%Wo358!Ry4Vyasl$; z0>0}fE@XYkrwGordH0JC@7+9Ju;dvIeuPZZhtr#$hrc1*KL`%~5BAPFu zL%IZ{hAu@K6-7ybk&+gM?i5MMK^l=xN$C<4fk8k(Vn_iANeKzR>-PV?>;1lUtc|s? zHrJj$@^CQBJ=b-fznn(>N%%(LutJ*un9Mc};|AG}3-Ljgw-e1v&554--ewB$x^!9> zEp5%@$F`j}(sl)ZEdNz>Dw$%d>9~F)DPNJP-~B5cxK~paH4euQEANhUT=>1r?SD`~ z((pc`Kchv7EHA?(S@84y&+nzIDsHz)qWjM4o9%Qq?&SQaed~34{Ab&sYiM{rbKGCk zwXD)jmTszbzJ=92`AIhadDW4*0U~78ddQlahwsh{i5Cm(cA*ZqCRL6+rfsH+ZQ~tZ ztv%HmDswwcTslu|<==U^1>;6Ya_aUV^YUuan!){*xH?Nf=n*#-;TTLHBM}sn)PYMI z1U!x6&xSoYotT(MqlhGJ3pjD|>nN>{bG*@CosSlB&bcrphe_pZlURJuURU>p_K~Mz z<&Shzb>krvFjM*LOAMbI%9C!3F}!tAmroF7~2ui;BGCbKKz3{ z2ybWU1q)s|eADu4_?`<(9td4&W{Q>9Cg_D!0SqlBZj#dD<~7Yg?*7`iN>Oa68uS~XkP_=TRZMhs^Y4JsXDgn~ZHTr2Sso*N{yA1BS3k3L zT-wLQnQY=!9L$p697SFLpVp(JDaODhCSq#FULxdtO+4_?>d^X_C;(h5Z`OAZ9RB3I z%TY0zT^I*VGPbvUT>FISsP8v}>&Cu&k%_E?;$m)(pVc?#aaF(E> z#Y&~1lB01g`Wz-ZH&ta?f~$kDkOy3BEM$L$cA9e_(rtVYV-Yr6+8~X1X!g-~MNn4t zLGptWg5({~#?dyU|p2GTGa}gJE4lrS|5A54ojF3#7%> zJ6L* z_i>I&tlC)zBBbv0V3DU|co2khCBW3+!#2dw3JCg<)|B7h;HzFFPOpV{AM24D8*_r;>VNCuDKVGWzh<^Y5vv#i5Z&Kz+&VFqwac zI<5A<_R*xR@4@XNslFy90N90Q=|Hod;UJAj$=X?|6tCO^}SEX74W$mQuunO_ZCG*alAL zE)kanf>-<&OC*(l6OWXss2VV^&s@buc`duP}(bq>|?`}ol4nnMB>K6S~TtotSJX6zdMXjc8PUaw^hlvzS zg1IN}d;D1d%_ChZtlo_P0S1cry{7rf8Q_XC+>B{U!oZ8m*Ve6&U`-47-!S4-v3-4bM!YcPl>&56S|i$|KVbnSC(Ls?k#>*Ok(h<02vx_=Gp{*!kLKEl z7;zcLTWcGOQj0XBd&z>1&PZ{0-#M$AYTeW!J8$vA-Q{k&Wh6%v>6T3^m+GB_EN8fw zn*ta!_exKTv?U9v<)IN^&4qLm@!N5&703i5Nfi@~`p5c(ueMe?U&h3It#gOMXk7jR zCkK-Z;x1n&a%0jbLut2RLed4W)x&u*Pi+GgzaY-suA>hJo0Pewi(PQcJe?XUx9YTw zyo?2e+FFUfvqzI$wGmndrpi^8Tq1Ngo=iMePi#Y}J(~Lpa&GrY< zo^u>^_x-QEHa`nh3#RuDsR@(|zMv(lAbQvG{)x7b&b$2g-L7oGJpmfPplkNXevmWu zacZ<@4na%jQ2FdBht7e<4Z1kRP*WS-2I65DCU}7cqjKBAtii_s$(s?-W6;Sivn->k zYB<9y+KbQc*^4_rdlehYBQGy+AM(Kr@DzZUlnbTvmVm8ejk1zTXTOB9A|BaxKgu zyjlGk_jp87OtjNVT!xp#B!!D_Y1H;d)poOpPp&3SBPFNi+|#c21dtDZ*>|cF848C^ z_L}oQwr#7(P9enll9d^0tS71uxF)d~@<`=`g$ z9G6SZ6!8dNoX`7s09vx@q_p7CCU%iSg3fw!nRePZ_U0_#}J1+gs`R!k4 zrm#}McG4Yp&zp23&aB~4`S%k0W>g4l067sGl#5^WpdgTqZmotGoCvoMpgVRs46Dp} zu1Vf>8~|!&-2us^YaNF}Lahs_ycSU{+6e~fsgpDgug9$uDYB?4itYSYBX6eWwn56I zf;EH?S^1aPn3`p0##6Qd2bcL4_mb(M;sn2+{alRNrL_%e=D%AhscwLe!ws?A1)vH; z=w=@Ws*+>Re5>n_APt_qf`u>_YRmW&1n#5Nno?3P-~dOzITC zRSDkWTAc8Tp592}#-*MRr*3U>PHw87-|I`Fcy!WBC1M^`7###^FvZ2ikwB>x33b~* zhL(_MT4x>kYa-Whg?_^o+qK?ur9XzL>tKMH3566_hhHZYLEN@gWZm-JkGA0M-nxFe zI|01clp%zX_pLO@SK)EX4rGqQlK8K4e~eE0IZn=uF2AU6)tRd)Q64S zin`3EO~|8q21$F7Nsx>>f|Os_QawQcBZ`EvQ2Gy0IWdHqdQ0l6c4Nw=a4f5I1zk0` zW3GOI!{I+bw9MK}3q`29lv@ujy}g3ZzGZx#|1a72gDk4|GSJJ%z&;0)TGh#!Em|7T z_B#-cwF*+lo-5dmvVdEQU(Abmg1lRH1{0qppR5;f*XB8*)n%}SGCknjz1g~XBOVwj z@((~J>n%jW4wykLzsp{9H>hXfg!uCI>4)pSvvX5ps9V0PpRYm83E` zn1q!&VmXWypMCpG;ZYC>rDLf#Ru?;=HFN(+%e3+1hr15}AtCr)mb!UL=Br1e@&@4k zcGyCSYXZOBV}p>qz>~}lz83^0lvN+)1P3(niMwI(`#|g$GS~iTgSY0p>;A$X$$GZS zsNSIKdRwHQ1x$!gu+tfM z!%D=No{EawP4Y>iD0?0a9LFy&*yi7<=TSqa^(Ti z2;%AymW_3}9O7zmid_Bx>g~1`Gs$KINl<-KVm&={1h9po`WmN}W?I3jKgkSI!_q!@ zY&^H=M-l*qIU$+C!;@bNrh}{n$*jNnZx3pkIc4{M++pM%5bAzTu3_k2uv zqvcjKbcm_2#T~u)r^elJ1(%hi=Tf6+e0TXW#{`$3~ z%TXU4i8!(NQfIYL`B25PytIo?N{&^va&DVl)m_}92C1RUc-ku&G0C|ZHy5T68J&)Q z{*3I2a=4a^vOVa)JdtExHt=3O&uR)4L;p$tFG~UI$qrlXa>rMPSuU zK*;tROPBlW0~wHbB_9dM1b)kTO2M6hH3gQ?mRIo!X`3zfS}%2GxHQ`z9lU&nuAp;( zV{QJ^X+o!A_LkrtIxGq=#Ge-Fj{u}EQ#AJOTyqBBQ@;n-S0`QO(a{#cnw`c+@y(cp zWZg%Hb|~c^sA{84Hkk0w_qVkZ_+Kx|UGefnXJ~pIITi{!(Y0zpqgk@KX03yp2q9&{g?XT$+H-t zmnrMUBUiu3X&y9}XzN<(+7B8}rzhtW8etc(MPJ^h1!r^bKf{OQYwn1o*`9WT`0az2 zOde|%?5_#%6yv++D*#hhHGm{4EeNF@vw@4IY->E-kTVOM{q{m71G}ibJYk^N7{>s=2%Cixn6diwfJ)0*m5yk1pg z?Vgr>7SrA8Od;M4+1V_zzgkwe)|A9ko=Ve+jb#B04i6R|q8_&x{MX!ndFfMFk{e`J zV#@Q|D)X!91kSqFcyz5F&qGR$uNu8C`4gp8TQ>6kUY0n`wdyVOYs|8+Qz&f9R5s_T zo<3!4I^Tc(t-?;Utde9$q(mu{!T;ogop~nvJWvP;7B6uWRadjGQ$h|@Ait>=g&CyX z4c-?^@i~Ssrp|?nBPD>?;5gU`1C)-=Y4wKbb{QHW8D$IUnNHa|014_~z;J|axO5beu zT4n3)fUn;?#6MZiboiuE4slKZWpZ|vB7s8GUSY-6J?A&7u@He`o3g=9>ZN^Uc<)|* zyap#-8mR_w*8_9Bmg1wZW6ulOGQR%}v@<5p&)MYu*xKd#<_4TEAq{vQXJUS3!rTi_ zR{l#-GRFM#M5f!-&M&IGZ`YTzoZWYg^24vz?VZ%_CAPj9F&IM`Sxfq0+pu1^Ezpp- z)ne_X{>E*OE+i9onh~p-W&zB=Np36V-pLB$;}rY6zMFdTrz24Xa6?|*_#3#?Bl zu;o~z-TRXeWGza`Ah6pRo6xf+ho);fzG@$u?Z$9RvH8|`H|2B*7TBUuI^T0|Kv-7e zN26Wi7uchD9RjAUqPsVv6JhiEzu&KI@EjUn+Kt0i$Q8So-L4Kk9*kF(X0A1Sc3)_V zT($lJe|N{0SSpX?C(NL-!E`}?>h#IpAAjtCfmbDuKYgQr>I`%^oUzUF#c0lrY`=tK z#(DhU&!2k#$S=I;6M3=vT4w9Z=Mh+Fge~DMufr$4@N4EXVn20tcwnMqO{)>x*&g<{ z(V<|lx)0&fn{fpGc@kVB_w29;{J>5XB+5Z8bVi5+`FD~Aue)*&9#=h)p%b$k1qL%~ zS1F)q{12mEwcbsH^@9b6kVTSV`(xM+DvRB`kQCan1r8TehKZ}V?Oo@nycpm-)(Vl}vn4@?-YP#6d>nzG- zG5cE_$%^}#a*&PFF54$6TW@d7Y7Gv&GU5;DmkU(V@}Vp3WQCe~sR(OE{>g>_wxQfOP?$oP%~^@E6- zo)O=#;aenN^Tb>8@aW)}kI2n5yX?j$pZvVFF_Ogxh76-4FEg34;Q2Qu$*rOP9ApkR zkO#StMgCPjM2R*#5|LNauvW`dj0{2Yt@;#mQ+xkB7o=iFYR7guV(^ z(+%tA8_=omxAn*I*$v14-a+f;;;`BNiC)W>i*ogj2iQOUg%rYv{WW{B37V%Z0`&xi zyu6#}W8mU_{~~WVBQ`IqkZ@wGQDo7#i2UBlgXaWPym74ZMsNxC9aGnickWN%4q%Ok zeW^*-&LBSYD^!$H8l{G_nx?GoXL7PVesKSWT~1(nfB%5JsIuSP<&fgq{CkCk9av<0 zq%yIhqk%niDbxKf^iQ)d%1yT*aW;5CJ2+Tm7J`fZdx{A>Hznmc;C+G0D;i!r>nTP3*wdZ~~zgu|YH6HZH-`h&hU+a@@(y`P}IsD8;g&?fMrkP1K z6DNDharfQF%^8y;Wy!TXs<({n&QPnOmCmA7M$O49iimPqR9&) zrcGYn!7PXCUZ2wD^*?a3Hov=|6ZpiRG=B2#{f7%%AeWA;HsvnyPw3a!y`Hn0uX>{O zlX>FZRq@IA3tt^tznEBRp1?<~4dxR@&2v@G&HhiO^-Y|ecw#bR#)EPo9oL^fo?0_5 zF(?Ehq%d=yYJ6SV%m4Wah&f|4y~SPLaH6L!@Gr`jDhcw80*qYQQJ3=Wjm+`n2R|1- zbRGYk8ANQ(mDLo<=$wvHo6+ZUmED>xPra)X_XXGYZF%AI0JDeh5I~yVXBXv{{oDBZ zW%aA{N`2$^ANp*+qy5{Qezh8F<7H?XB?{BNpNTq~6Y;F;w=7}#{?#_x>DP)~a8lAY z9vyAdw4vggS(jh}YeIbI&3%~Yw8b*(o;B-AWuUfu+Ijz~diB!T`K@GPD-#52x?I*%S0Xd`;!mh43F}_2X8-&q z;+H*`tTUCTl#1#LyZrBW)XW2r+}rI|rJr0DJI#%%OlN*FgeWdc>j_N8Nlq>;ed$Rigp69vz^e> z(QQdx{5qwd&z1orp1O-m^#F9FHc!Vcrgi9C_rOs7MYkO$isRo9G2<&mO#U;00CAB} zei*Tpt)G7Ywq#bL?FJcZHv7$iEGb9cD$`~%rX{LQp!r~eu=#?uJ8mh{I|e{H3B{g> zs^C2KMjs!0l5QT{-Vp^(=K7T?&-)F7rwIxhlw%Dd6r%R{BEloe}&~nPSPL9 zx&VGxjaY&$?&9uL%iOWW_$*IeZg5z$)(8@a`uc@T)1ze@V|Y?WL< zMC4l__hE<|aY+~ZfB(OUnohcvBy6{yfZKsfC*9``3upXmAO=$%ylXuS2WqwU-&3Y? z@9<)TEpNvDyx)uap95}E$o}K6sj4g>MAGa$TOkI?J+XfJlJHj`M108t-!9udO26!2 zNT_BLc6Q1ssrE(=a*Z^&E?R)B&C0!gQf6u3Who16nAdAEQX>J`xKrA@P;j%oIaA79ID62U z>bio|-F4kOE{_rvf>z0A89VdflEnA#&r`5KF<2ml>T@%HD2|$(f~~Xmu=8}s|JG_I zgVt0;g!{}PO)YcxS7#nAEQTd)q*T#h|!7j zC~7gau6#FScrhAQsK6=hv2g1Lo$NU>eN1w1sz-*?EQ#kF5Cj zZ{%;%vw1i46(8FMitA{<(mokdH*yZ$6Te+QN;s_59 zU&uFlg~p%$4sNrctFaKiV5_ZpnEH6e|9M8y8n^FRb@aQI(kH~ex) zFN9s3X8_x1WWXo#)2|Sy15u18d@_pvk4B2Bix>aajYrmn9r5JAvI)G8zsyn1Q-&Se z4C*OqcC6F${$~0?2@TrL@3#LB|HQ#*n2y~%erUJSuy@2oc>(g4&8zwU(>+nbt4#V4 z#G*h+Z7CooWlp~=n23nzm=z3^-nwrTw0Ps*a9rX2u4hw`|El#05xn@{8DOM!)ZQB) zh3z#wK~^K^{0BYbt!JkS)O`A>uet6%Cy!zt%fk2m0ivPMpWb>ImxTfH@BFA%sMoFx7BkrJPm(~) zZkLzfcT?%gMGjQwaix+@u3*OAlZxTH77?3IDt^(eRD1v|XW^>4JQ=Gpt$byxY2hPw zcaLlYC~`=9aQ_kKPl2B=mhaHd%0G`z`Zxz*OB*m%0Lv?tdnBXaV&t7jAV>RU5@Dsp zRJzeXQ?WlFZnd9w(6)M-Cxz|1VQMKeupqbvINsecIdXS!(C*V9jyV*I9rPI#SM(vvU zFOp#Je)~6I`2ftG+FR-VEX)*+)xeljL8ew<4xoS4{O0ITNc4+=^52KftIeW-CYrsM z^ZK<_3B|a|6LKleqVzflvTg8pS=@t;93Egydz%ccPl_DIRR`zjM91;?S ztL^Oy@gkv>b&m?hW<(6Hl|FiWX>9h9sWe=3t{sgw{Nb;_W2#fhK4v1?5SC`vP^LLi zm($f_(j%wT+5KwDRLCfZ@G7WH;509KJ&cdyca(a8!P(x`#iWz1vK971oDcT1CLvxO=LwV3UQfitosuw6`PVQSOC=L_4uX3?$ zniW*#RSm@-8LEcuV;TiFXTL-cYQzih`!Uy|$`!V8ik=4kPGj-gEZ90>K-Z&#{|4^f z4*>cf8}Pe@Ovgh|iws~*TeA^U#)shE1x+M*H`6UA={y?pPl4AC-|lVm#C~tA=%@F0 zzO0>mZvJA>b;%w6hcx-44&4{b3pNhoU~~Qp((eZ8!sa)yFGy4038ic2sldhLNPL;SoORmFJANK^t??69^!8=fAiu`5EmA|9 zN#D26W;*+J`nOaKsISQ&)!GjbC-N0d&MIn1>XUw;+g5P%aHX?Ho^H=fW;#~ErBW2+ zs5YlFxu9bABHxFe%5yFggIl|(r<-GULSI==NYzaPF33=99lO84O{TYzm-U1*#Z4OHDG#iAp-#G-&_!rv8W}JYhFNg5?);tPqCNU6XFXJDJ z0}Tx6%zTUdBgt9c1^lrL)sV@33N^g>?u01MB1^RSYmA`jD9JS5Wz1ZZevX{h9=c;U z^$+fkV@NBI5U2K0@;Lv1iTYhI_|%x7Wq6_8t{=a-YO?5{&p(u692xQ`(qouk*u$~_ zu3~WT^!ktQhQeSjMZ}48e5VHxU?9WFeWK0Zep9lZj>fI8Y1JtPA1!^uJN?JsjN&>?4)= zRHT*SxkJ+nw2PHh44n%5-PnFNvD;1m9HmAdJbwwIer#02v6OJn8YE`3W0@fxRk^Q4|Fo`z59eM zj*qG?eQKTT?jtPKO)azaV*i>*bRMKxO&&uTJV#tWt#kAA45BRjUPxU8C=#r8#QRB| z^z;eqzP*YUxC3XZrnD$9ZLlwJI3uHQSf9Ob7YtOno(uFOA604!KO_KA+z6W5_+I-t zrW&p_j;Q9m3P@dTV*wsvQ1OuQUG`bY_#(4g{Zw%h;grZ_*WW7x*JIFZ0n!>hzBwxj z!djLO%)3RJ=Wjp;1Luc36p&!$V&7a`lVt&=-w6_2X`g{2fQp9=0bl4v*MXXoaHrh1!N&TgDKx2|90Fv5e5b)X*FJf@2xzS*MkA0fMK{_+;k zn>I9Kh5J$j)(kRsUjK2FJzEO50V190%izm{=ma=7Hv@77ccO>E(k1fN_;&&#JK4Ud z58mYu3~zQEKd6HBm1%8^=6w4=J#}9I-0L;#JskQ z-&4G_LM^|?Aro`Ycs{Si;P0R5uB{nKPa;4KnQa#mKmZHWCh>;-bPr4_;EEhG#pAQ( zk0#-V-|LF7^qf13YL(l+B12l>{q_pZrzhim+oJUHJ~C?R4wrQfJP>mQ+pK7{q|Svz z{^M%h!}elE;mAE-FktP9h#DvyL=3jde%oyo)ycT|8}b+(w4bkMdA%C+07e-fm)4)W z{$Ez>aFREG{`%hOUKD_n=HZWCD5NL7H}nccbpgG}?;|&7(M5U&a}e3T*)N^zUt#UIvF~&1)~(~578xjyI=7;_bqGyFg6=z85OWUoonH3- zxaSyTpj#h$%2(T;yUxkY(BXf20=&N;(T6_Qr(J)41|E&;h<+DfL)m1Qugjjyz;~-! z`>0;$0_D)d5?2UI9xVU#-1iE&Bpew@(f)fKIfh6U-?i7drY?0QPdJ~6cBkIHMrZUz zksc|4vX_&o+`UJYa3Zd70U3;~SRX#r8>uhU?CdoTc=IUa0D+D6VrQV#F0dCIHMlI8 zOp}(CmF1o;1}~xy$(6A4Vh+E32HK*f56e-^o~(vK#$kHX_q(L>;Ke<%fPhwn0?MoJ zQtkK>W0~NSY??@k5Ok_^ynTe3yEXPEB%Po{i^nANt6$U<#<2@$ji|k|ij0^E)>n~s z&>Avw3Z8g-a?9yzbMI|DjaYn>+hRW@T5G}&fKy3Nj|ZQe&8cJuOGLo_0OQPzC?1?KndPPuy_1}f4W`E?}x z_*|;H2$y;c&aP{?uV3cEf>F(yvas52UlxI1qtdNAd;VR2U%vg=TE_|7c&h4$AHs%p2@N1uY);@t zP5a@iV$AtGvF6?*Y(65NAW~p?O(LsOIVb-@2YaNFC?0hqf4E;fn$>GYUx@zcetRN= zxEe@>#V{Z2NH}C#_1AU9S6DJZNSn)?zQ;t#4EY=<RQ@Z9}kC ziGf&v^mVP?dpG}{2LI)|%Zf-X?xP|;752*%kHGs8#%K`dS6TmIJK{>UL)Al>N;jW<;JUsVyc;w@FdDUr||icDPOwD zBfQ5N{eORZ>|d!JH%JxeRZh8;81cm|qA>ZlkB!lel-nV(L7ts)f;EnDotND+p!J&(~HX#<Z@QB+GMI!X{t7YJ`8IBJz0UHR!RgZREr*c!$X1$iV#Xu0vO9TCt!l# zvz;OVyCn6#ar8OosJwMQ0Se#>MSBA6>ia=rybEGk4gH1HtBOmK`1r8S+S+@1LL>gY z^hiB02|lW!-y>{BddP?VY~m3^EI6PgKw`Q9SUBDUuLLG^4T2!pEkAd1k#4xQ0{QT1+wzN-TOdbc z4n)Q%a9TRK`c5`n6-2X*Bi8aj;$ijMgupRXd%Nu50W#BPt*>z8zbaXdxhe7PKD9LRkQZ8O@g;( z<=4rNHP6G02$4+Z9^OPtuV}0G_PfK`B%fZNqk@M(Kr|1;lQy|z^?b9h5>vEt2@x0o zXPcd??+CxU^Sb{VIx;8u4g)+a5qhz*-rtxWY<#?=9TEEvp3%m&>Jc2QNNOFNUaN?v zW8Qp?={NRO2cKHlCs>MCU{g!YfG*d$;q2{ibUWABMC51v92^NPaa1)AnN;^YZdI!N z$X(aAp5yES??PZyZvBNrhq+m$Qq!iqAM|l;Qw;Y`qN;fa>aGDnr?Q{)@+3|ku7tkY zm}yilCX7)I3<6T5r-ZqD^B4MwK4NM8iis9B$JW>rMT(9gg{tC|;tQswOT3b^7xFdug0}l&4_U#izhKcu!&g*CU5SZf89@I8@aWPj z9G2ynNc(UIP}O^2`6m*O6IsOI{&JuI(KP)eoH>Q&DYZ!Z#c&IrWi@%c;ixC}Fwgx0 zM8Xe`Ff8hLIl#w9zKQAMZfH{FqP|^tFwLGz< zb&7n)TZ!e&IgFJ$riumZKN}v)IH+=u_K>cCQOSmf)*(_XybSx-Vtb5BG|E2gnFkF% z(FF+fPBb;=!~OE@4w+TTv9LsMYOM7joYCz;7{>%Prr2ZdYWn!cn$fi$`pjW<&KOm1 zeFjbNE=px~$`KCe)#wssMS_hEdnN8e7tCto` z)BRvqS;ZIDCt={j*3FFb9|eY3CA-YHfhVc)48e>f!5g=bo_1_;S%_9MeEXshE-5u8 zA_AdcWYi~>?(wQlf{$S$pgimd+vMHyYZ6fpi6TM%7mT*bhXT9NsE?b_ zqLBoaSd*EKNh`C;nUFM~pRdi}*OIe7SxmLrFGGsmai8(F92S$aE)SJ{=)OW$;J%9; z6JiXBBaZJL5x7Y@D360eh}|>36Lg&4GmJBOdx?^KNzuk{Zw$G1%p2p4so;khU@*)V z-Lqxe^;6__B104Au=rQ8DAbELv42)7+vR#LK0l-z-eA~gUcRy)xB7gKK73hg4U>=9 ze$9x(V2S$^QUcy7l)9=JEx@p@g{zTWP&eLup9&@8M?ta5fQ%~eZf_kD(EX;{ zS1Vc^ij`*v)b8^jGq|xQfN`26mKD7UqK+oNM0Dr_;p?#KJ5o|bdxAq;YLcvq(b`UkH0_6B+l4DJ+kGE6DF}grt^X>@e*#nn4M~MjLQGa*#oht`Vrczq{sWYNGz6evQ2^w|cy3RP zpw)qXp_u>@Do`;qS9CB$+$K6WH~>C=u8my(>W4-~vt@0XpaBl>BjreWup;xw%6>=J z^Z7*50`B-6q|o8#egSD#Ish4bS6cc>|2Lo}3Ta`-!>jgrSi@XYaV%_*>HR1TZr13QN3#Wr%Q&|K3Zq3c$88Gc5q9MCNbo3}nRmE~X22jHsZwqqn)OoUu(`*9*Fw;+y#oX@;f|0k~5#2oQX-}a~_a4 zQhU;U{uYdYw`_+Q0)xn*SCEt20#NaT?=b}M6G6cCO<6R@Q1flU7)z2sRyZF6 z>IASb+sJ0%V>a}kUTOf3*;4x^7`)#%x5MYeuW94o^KS@DJ^z=K+3_A&)M#Dp%ea8F z3RGvkrWG>!1Yi?h&`9Ye$Qki+2Vi}NpzKx)2KPTV>%0iT#9jl| z&bm_#X_h0=VYEY z_=R{*-{0SXq~6=?fyb{$7YM?%A|Q!GNa+?}(xEM+1SjM!)Rba~>VOC~U^obj_yleQ zj@7}$MPG|GUtp54*&Hi)4)x*f{fhU4LCbcjlryh*Ed~ z#@V0qFOaZH(7|I^7bDAaP#tH_@gJ3pZ&7hE%WKo3PiN1 ze-QLJpOBD_@D#z!Ez42>x0B#pXyFZV`*GEOKIf8yxqXgAe-FB9}A#8;G;?FE1shLYdU@$=;p|M zlK^Q`=6jQ6DI37mH-!k!N5>thso?bnbb&_FPeGCHQdx&NElfA`75AcBb zg9Hx+@Dn8R-Ml#p?eQoOsaNb-!dZXo-E#sHP8D8<@a1unB(aJg)DLJClFaL?_I zDuaf%T~AibLQ*q8gO*n=gk()V&S}FXUjQM_4z83Bx&iP zAurmc^W65zY)J?`CK0_0y67BHR-T>>M%;uBuUI_CROR#PL7Iqvv2LdI-hp*)4zo*( z>X(NKHH!g9@#%j@H-_kUpjy)1Dn)U{XHvATz>9JU45Opy!}V}`ET6>!dL;@8+hGE~ zT@1x%XQOyW7PEEmXcstZj*NZ)>H|R98@(`QP9brOU!Y1fkCNrmI(RUZ1#aFnYI`z+ zY1);d0MKjes{%;_5N5m`Lm-|+9S(C;d;YxqiQppoM$vUh4s2i)0*r39@2f#6w@*YL zfL`qj&tNUB7$+e8nr6n@#uYk0KL<_4ddzmd5w}QIO-TYTPH1)R{q#u&l=h}XoVfK; z>vr|Tdor7BoUWz*$)15woi$)sw|s@gxdDta=^5vN3BwQLne{Xy9$2wrhJU31 zWccnM<8#eRXf^8x0)=@!DFS3m7qO#d}0C-2#9m0)gu55ytt3)YSZJ z!~Zi?6;rrU;GY@C!aaQoFw!hyVv?B;?;V3Lc*PU+dv>3U#M8MEWI$ricLOAa`o_+_sSonT(X!@B_8{icP1E+K%f6HuH(-%5Ui z`1EJto!EvIJOh`O#5&Ub+#t}b;mMkBb&Fzu(hCf#Uw~$(f>r^>fN@3=avF|lLz+zk zrMcqXK=WbPyu!5DzxP#qWh`T8NLxe()Rj!jf-%IuCOD27@8zR_n?sT?$#c+iDVdX0 ztQio{?)Rb{a4pBaDza-hD6t_=1&LA=*2d9JB3Rx75??IF2;>ERY0A#d4j~oQzb7Mx zlHY3DBv6gpa|0rIZGcv$cU{8(gCVIQ%S13a!if>+PNaO~g6%_(2~+BEg@;kPg2vQ$ zF3R9F`Z>ETS>LwHu?%FW>ruHaqf9s~XaME`^Fb9I7Puls>lH{@-SbKq&5`&2hd!BO zzP+1x0yVZzf7Nqr4Gk675$|@Q#+JFVdZZyGIxvX|7sU3^C!xcg>RYW7TxfM6Y+M+P zKOi4!v3XS$X_*hj2i0jNWQ(I{B_{I0X& zW^U}wR?SoFisZ_5Ic|lXMUKF%RMZ2{LW9=iJ@#?2(`%-~@{803Fb} zhlT5?-0nSR4+XDAOYd4fRWevz}arifu$Xq`k_A}`5GTZ+l_Q{k{`@i%IrBJDd9TP#nB1a73O;# z4CyM_2-@NMWC!>|itwgg$*kMHuMn4P3>S8td!Szoq0FgAs1WXh4-2IxKWfLQq^F^o zF>N~ozD2Ju8$d>MRdfM{f9Db(=utZk#DY~~C78g(|Ao?(WojGBxX$JGT-k?9_9((26YK&U4I)bk+qwG9=CV3>YJjWUxA4?^^cmZY| zM;VdzG;!%`eapXztJ#)wg7hzJ>%IfjGJ*)6>R!f965C=CAKuMkDt~QW*FcGAujm5l?rm$Ou!Il^M5Zz03(GrPW|8`a%u$)x!8_#Ro&j`EI&c2 zEVYBQ6kEa;zRHZ;C!&6V>z3;;8M;}fmj>qPPO}SubVKfL9H#VNhG6VCE!xpUR8)s@ zHT|kBhAy92!T^THvkeK?M`F8D-yI&rh=<_~`(hg!gm~&MLp zo#NXO5AlD&?LOE%f*pV8c_ND8@fk=tUOp1PPi>=fwmI)?3FPR9VE6rm6o)rliXSO3 zy16LSV5FS)k}&+BTMx!&Ft8h#>*1zeKSE9u7ORVy_g`wC>BbrMT=W{>w6DC`uzVS`9-!fM>&&qzrhu5%^vL|pke;+XP|W}qgCay$Oa+uj)|YKtn9V} zV3w{<-6OUtJyD;D#0f6nI97j`&9!%Rv*2>!#TGQeHs{e$)PQ2V zIP7c(qJ0N(mLpyF6{i-x@jK&{F=(|m4mvlmPzpzJ2{Iv93WAG|%@JuG(Me1BE6W$Z z?r;$eFuZjNgY~U7l$ynZJL!K-eZ8wxL&xEWAmD=K=dT+c0IQ07k#3*^cCBXWNy&f6 zSrB-%C*0YYUCTsbant#{i;G(K83zPr1{>QU}FGgLfTcGGi-S)d>v-eJ)u#9zHph?%E{)50OI)C}QL-RK)G_cjgA6IKU zP|b3F!RCWkIPK(18TMG$G%72ZbUfPJ(=4RE!uvt(BmO7lX7heA^M-@Z9_0ShyH(n* zW`~v3M_c~7=?-6cu7oOlsd;@~g1x6rss8@vYa6w~jxgpt{kS<{PrqUOksn4q!G_C% zr~SCFQSU&HsqyEHwqLIV@5()3F%#Gkx&>UwvPSw!F>n;?#jem7uZ}D}pk-3p$0?I- zXKrX1c%1#`;gb7S2hEPnmrU5zbe_S$`*7V2PUe#Ic;LxQ*1iunobPdGFxYGO_$kA) z&j!mZByP9;nIX+Q<6Y^DH4G))zx^sQ9PC(x_C0w2BKtU>-tIS>=9)d~xBWI_R*dDr z`=Xy`9gsQoJ-^2=7I*I{pIGq_* zj5CrVAGtyhZ*X8nPKW?yv=YVrBV_Irs z%)CXREXOSzU$4F#EwQU6@fFjBPmOvhTf!cFz97{+E%@@Lrfa;9wL}+OKXQ|+NsRmQ zuI`d8_ZGQ|K6mQowwWNpoi1bxoDX9+a6eSs_gj?lyl6}Y@H}14_$#8_ zd;Pu%-MDAXIwAUly2C-q&X!+uKiVdqOFonCQNaXkZfb8xZQuDAbc(~}pc(s3k6k&S za(adyp8c@XH+fx^k&OR z^(E^Z7c;xa%(J}D<_MhGc*uTKdZoF+Ge*%Ao=++*Z}?i2WR^NSfLtdAy8T^x#f(EN zS7%%mblS6EyCC!2#31ki??N1q-73KGnofon8$e@`r~{BdVeke-kOXCyXOUs@A-ReF Z*@I`SYdw>?`v(IMc)I$ztaD0e0sx;|uU7y7 diff --git a/media/images/cutlass-performance-plot.png b/media/images/cutlass-performance-plot.png new file mode 100644 index 0000000000000000000000000000000000000000..f61c2e50bc6931c6c33b4cbce9a2cafd376b2b9f GIT binary patch literal 114552 zcmeFa2~?BU_BV`*MT?5oDpnc1+Cp0d6a-|9sFhL$96^QzEXo{434}m`B2p(%sZ3$2 zD1*$95FmsgRuLf#0to~NB9O=sKw^MEhW9){ZT(-rdvEW%zVBP_dYiRe5P6bkpM8FN z@82Fya`l+C#oEueel8;;v)1y*LnmZp*3e~SR=TWO4*sWNcXj~yu`K9>#Q~Y37Nt?} zMAqHh%3MaKG)`W4b|rYe+W$xUAQ_o|7(oA)>7L05laY};usmdbDikpxGH>09#HhR; zQFmKBpn)W;Z`=RtF5P_bYR~-_GouL}9vt|J#+Hql7rQU6eI4&&&;1IsVa=^Kp&M@^ z?OXQ6|Jr!{5dVj=^d0u$3wb=Vbxzl(G&f%l?honj`f}{c3<*u503=v?sO-4(+56w? zq7Jpa|6L~Z)~c-^o+x}_zT*9pm*zV>-v2IR@}un9_fI0_u4XQK|73dmhQ059mr?op z|CXE7Y4b#ja|2r8i?f9Ysbc4+mz0XVm_JxvuI*ggiLDza^1uBI!j8Y9R7;U_6{#?M zxL8<)T8WtICHWC&*VST04%(0gOD;hhvHKeI(x{OP!Gm~HFZ+w-?rywI4Si|F&Q#l8 zSou-?xzUcCt$o`LwY@YY;+CVnG+%r(WRdpv(cIL)_{x|40-e<|GB=JwyR|Ar<3!LX zEvF&_hAV>O7P%ccHbg#__uBGEa$lG~|3*^`g10d=`}TR>pb>NUjid6oCJZagH7GbX zg>Hg3gm>E}_E6Y%Pc(~H{m>swqqcro6V+7IJC;;2e=v1|frrf)%$aNPE4~l;(%f6Z ztbKKOZ9JLIcXf6NHwW17)`J$koeqzC-m*D;4e^FGlHyE}+^P(9G9kxt~EiG?lQ12MxlN z5?9CRVYvso{bac_S`ONE*(&pMVS$Vj59d-l=~mnk`R`?9>QtmKO-^qN><{~#l}_oR zFLo{3c#WGMmF3bqDh+U6lL-xn-2{&~8nJ?s{Hr%Zo;md6VaCke(Y6*f1J~7FMVwMl zCvG}nviY>(!-F;Cq*Y8~;p0b1tg~~6_P$WZoYI@&}PxiuA;9nWWA z>9YlebNO=_o%E=Nnf*;#=ADlL>zq9TLEuN;h=X{7W^uxULsA^$i+GnFK=48^YL(je9GAIl5kS^`GOw;h?nq3P=mWNZ@v@swEk4OK| z5YoR;004nVPK&C z@|?3GHT-O~uF2C%3Le75U^U_8)_zS-xDd2y{#Xam-Q$@oZ-%2cqkrn z=h{2G18I@1pK)H{Ty98uC!O0FppgB>C+_v6XxmxtvOzVQMCX8JGGXIBJmR4wcsD$- zLFD`!yw%TaOL}LKI^JY|p=(+vy;loYf3EuJ7%|O57cAR! zEDdOLCl}2M_}<-Hn6CEu)oV zROyJ*4Mur|*O{I*zP}<4xm*h6s*u$lzMwxDPMd1}LuqpK;6*nvn{kCXb-%**KK%Oz ztlSZ<=Fj7+40G$cGb9v5k?FG1d)&CRP|6|-TVAQ#+})FwV}XyZY~Vg-t)z{21gq1b$&8Q1F)$k4ri zmsw6?hEc;LmPmLZ3w6SA>iCpNUZLas0?*kg1$HyvN?l{)>UjMLyvbASlRcypoC~|t z+>!mKFzYdl976Q0u&wJN(4OO@=}pIFx2x_d%yZ)MIR_cmqpxcYl80G@lKsr@9gq2i zHn}RN(FV+@Zz`G!s`Qxr8NEbRiVyZ@dAbrR{hr67jl^EtiMQWp#P(8-eR6R3^mFQw zZ3%Y`gtJ9@S#$RunA-D3Tb*5-7_F!IEy*a_QTNl@sRWz6!g0ceqC!Ru=Sf9lB@Kyn zj;P~#Q{lHXroQk)z}%7DjDm60bTrZb)FQx>CgHnu1#6^{J(`+ASZ2S)2&lyT7l??g z)+%3D9J=mp^TEEVe^N)1yDc;7>8yWPgih9o3o9I3^;9Fa>aI~vlw#T$%-1GyYbm3H z=$Z#h+wA6lmO^qHc*`#52wW$4vP538@#HZ{DYGzkY@~}WYo;>(dSa|MhgWPxBFg)X z9PMIVpG?w18dF zsHcz2V@w91Qw>^ctQuzO^!e%W%!Trh+U|<2q;jS!vGCG`Abf#H&>vbBc#q*MP=NGc zFcl+p5%uAhTP3HIO@&XgX%5D_K%9$|;$D`!fic`Dmug3J_xps8&!DG&PI3?BZs0G- z1#@qY4GVIpz*pFgU0#QHyRV#xvzuzInR{r|W{HeA;&`K@>$J9T;$qUXbArh|suzc3 zi8%}!;h^JIZ>Pg@D&yy8*h;2t?R_J?y+4$>FnOW3nlSU4ep=V~8LZC5V$YnAs`EY1 z&j;FlHu9QIT)3to(LzQS8P1I8R1u5Ri#m8gf2Z)&&<|DcnzA(y*zMZSsHPt zbHvc1vybD=-r?Z6F0v1Cw)c=thn@B=2iyp(q{z|RFZ~4M#53qc$#hrp8kW$GU9fik zT(3N^4VRE5O=#{e6_2*VKW!j3-f^$AGOQ|=IsKy8Oq7eU7|+Ob=3jI5ebB;w`GZ}c zqqcl2Uc1op>%JvSoWNu_MXbjLCunEjoXr-URb~ajX51bXcx&sqtsLGw;kv}CFe{&O zbZjKK38mI_Qh3I~oy+BZo~#gx8GRkNDD<(je$_W}%$em?l-^kXEm73Fp13Dj;aLx! zM6UQgj(NV#EF_R$q9Mm+L3-Nku+xqh;J7bYQ4auSFsyreE7_#8EMF67DRF0o+hH|s zS&h=Lb8snA$JfpVdqTU_DP3}I@bjvsE8bMidrem>$7ti6g0+iyJK#te@}w>-h}oXV zb6Tx6L{xC7Hk@*qNnHgf=fMq-OZkG36XlHH?Udy%JN*flnL}&fj1947Q-ei&;Su|d z?FFIyhjvOq8-rEk>|)-ak(9o!K#Z+O_!S-UC-?4hc2l>j%QJdMP7X)APxI7!cyVW7 zUvhs>s_^`nP__C_} z6__6O40lo|x77c1vjSF6y?uc>2E2=LOn%nh5XP#S+(&%*^4adUTk$6T?)w~gPaAv} ziF207G{Gvx-p4~IL*phzFYaC*J6e>DWQgCg!UQfOyNXro^nY&RI=9TH@bcgwN_DW^ z%|ug7BVBkkf9BDQGNy>~#%JUu(9Pxd8v5DMl;Ok7>3qi_z4#UJ%&@pH4loyM%#E<% z&$U#Eba@TJqYU!GcGOKkSwFNdUw;uoD+>G5|JswcZtbl$V;KWnBXF#{*} ziXPRde?=oBPDRsX7~dD1TMm?C$?uWt811545sRk64lOjhGV{m21t_gRJ+LZ5_!KrU zt8{r>eMbwwBFbROt}yX#MzG;YlI=*mGQ3tqJ9C&jS02W_ve~23yH2sd z#0Ov2m_oECr}sH{bkuaJsPpR&N|loKw9tEM=6lnJiN$Dm6f%Ldu%lG`B4S>aUh`%~ zhphLVJGWU|`=vr)sj0p%ci1kyaba<6Yj2UrlDw_h$4ewrVeU9Z@1HtB8N6R%mIDta zWlxkIdF6pVH)-@G@WDmWT!S1USTps50!Ag28Fm`VbpA{t0b5^hD=&MLbt8aaW3NKs+p1Jp_1s6CA?P5>&w6W9 zW~N^F>WphT!?cmpMbTBP1p!#OZEe8F+QEWZ;xS&$g-M-wthyZ7}YnY$%$jjt| z%ET3{YR=wqx#`m22*xxW1B^Rp?2Meeq?*`H9v^sQN#3nx3Znu)qM89w=@QHikGY2s zUsf-`VFlrld8UT7V=VcXn;OE;M6e#g6?_@qb~Bw=N=be3wM=+KCB{3P88vJ$WaZAk z5_MNDqW~suU*PL_?#U*npCu_&^Y@FSt1I|*HbKvdh|sqzI%=yjTlAf=V-H(*ywo$J zl6x1INbmf~4`Bk{fJ)71c-ly2RVrX{VT$i4{)1n3PME1!- z#m$APbq#dBz4H0v+%SOfl3McU)}l1)vFI+dPAR!{J^-ap-IS)7!`fkW9e*T`e}?yg z>vEc~Yrc=?yU!8buO+I4elNV{YL;m>;S%4#OH4;kh=jV#?AV!GM!!c3R-zeODm1Dp z#0929xb5;?MTC*J2k`}b!??!hjbUY3v$J{{3q>3DdBx4DZkf#o%e=TTy&&9<5d$xYqI!xREKaX3z`1e*%KTxTJ5hqx-y5pBRkn~&5w)*ss}EBo zFN^lnoFAjC3)y}1D5bw#CuW~~2nS5+m6T5b z6z`iZjXCV+6_@QbE*cX7MotXt1&9O*B0&Gcn-Y8+ow@UcyUG(y7zZr^ULm`=_S3*vJY3N<3I zVy3HXp|Ws{%rtB5Q&127HHua&BopNQZt4X&kX3wGFI~%p+5E)G$Lb~Gn;kg}YdLug z9mJj~6F&1%F$G#PlmGIExh?}Fc{VPaHSRCh@vKw+I$hx<{3a>h5Wlt9v?wnPz&ESD z7Rvm>!=c=>ZW8YHv#nV$+3u6675_MQPcJ?SXXqSfT5C!+dD-(gP61?ncgI`-W4cua zZwx6+ob3!hp>6UoHV{ODwemqB#rbw@>00mDhq4C3&@Y*I)(Jne%7qC@W2{Q(;=+b~ zju}H$P0t^j`Pawm$@y7evy1JV1tDnNhcC47StGN~{U0rtd(Kz1k;h&J>wJQmswBqa z+9UW@TtbHZqWj6I;L51conw&0o~8vyUZ86U%t5T!>>09A3PrPDd6MAO0qN;`K#RvFgQg z{VzkF==o|6A0e$siaYEHpGF_Xu0;D181+ABA@?z)9QGAwO7$A>xB!o(3xs_j@fvVHZ zpG8g>R>A-J^P*jJ(NXo~)oj%DM~7{YaT(FJXfhSbB^z@=kl7{%k+H@oXY z@XR4#jpr_F=Rz*{Cgg%|WtuYV1*xNmPiMa|1ovPC%*5)+(}@ZKTeE2R^fg!33041+ z!SdEBgPp77ndM^~hJ*5J@V;mBeSR-I>&XzVu&ZC-h^98)1%yP7Rg0BvncR9U!Q{*o z{jBP~!N!yz-z?J1Gii?71V4V4IXX;VT~qyt?1tuf`^+sznL zeX1RW^C##0W1ET`@Yq(S!imB>sM6Loh6D^kh2H|QsayJL3YeE)O?ov?6fILijVynBs@h4JBCgI2yOjYNLCS*xak z1OKp<4YA=}JiDeVlM9?O_yIsw-HUzwMNT!Zz)ptm>05lNZ+z02UvEzK?4)Nzr*BN*LZ`_K<&% zZY!%LJEUt-A={NMpWg+QRiwoqK9*}Z89$ckI6o8gbwGU15Xk3FgEcBRsdy9kfp*$R zC0nyW#4$Ng?y>PW;=@Z0oA^r72R0LL@l6qn_$T`$sq~V1VB}z14F#!%D%Ak;wYuQV z7)VIw#6k9pX->+6b_SJ!3CPttp^(q#^CKpg|cY%LI# z>z9*?8r?)PW<0%0${f9_s|jok7MYN7O0K_tZ+mcClrtFxV5_e#^+}cCUkll~&*TU( zu^Jg%y;j&_0mLJgYDgkx6hYloACRV;61>SEpiIRN750T5mo0=}4vF{)bInBgAtLmY z6ty|A!6kw%X0tbNzJMj6+%cJn5JwmF#4DRl&+>Qr)3un60XLiQFJ``}o+(jCojyl< z)N5wLIy3gIoOU&)pwstDKj4&3Ul<=AABm!Eb24nwu70W(2X~9d3{?8NdTuOjbKnL{ zduvy$?RCzB%BNEVE)(X}Vel@>TDzV$R<5Zk;$(j|j+7&{6e7$(9g*ym(|X46jyJ(D ztp86SUe&JROL=wC1Cz;TySC&rALq!eI|0>kmI{wh!Fn=mi=pSqifI=yf{PwglU9uL z$5wzKv3$a(fz|cgMeV1j;MR2 z)_T9#OCicmD}kdfI-xucjK)1B4TOXh`96=Nfp@o-$y5fx_$geKG!rF7_<-JW3-3S^ zg+oT5UNr;akF81#!mP^R`#p0n0KB>D%CqxOdCtm3iTagp3CC*niDSu~p*1CY1W+p7fIvW6OrjL0_t7iOSa73q8VPoMO9Rj&EGWM0Uz8tP3 zt#HSNrR$#5_Q{FG1TtKkEHPi2<|$4}OLa?IY;4Mtohx94&vct>Ap?DB?^c=;{@9hd zlXF+W!+FoD-~!;%Qb&Iu^Ox&2dxdO`Q{8eI71s3K#n}K2_p#h3D}B!&`tw?m8G5cn zc1gUPb!gelOQe0F$^2=q2?^u@Gy6|HND~gvz_pS7v|ZH6rRbxAapgwnAd^{VBmukA z1Ocu22qVQ1bf$>Bb`WyEN>cF= zE?P2N8iWT1?w6;quX1iVzFE6HngnZWCmaMkUE2g$H(={Q=!e@{?8vPq)51#A8RuFB zr`a-{H*IF;2Y;4^C@f=@5>aE;i*S!y&c?p57Tf+jbP;fALyo2K{Cz~_z!4CJk1`z7 z9zA;AkxQ}BY7jhFTa-M)fV;+{mdj)GiKjS(^B4H@s4{BNe zz*GfafcwSR5az#t+WA9(U^`NDFmLqem?_9r)5iW#P26nX+d=i9#8&6tj>@4M1>ySP zokbU$plqt|Sj5vulsJ-hLy)L^*g(CMOVt^f&Nk`;Zot9OcP@7LQr;V**N+M@PcG?O z`Na;uxt5l%=u56SshP@=Izo(v%Zv0rXLwX1oFQ*O(9P&`U`l z3KQ2M@T?!9ZzP5ZDrR#%2TOm(u)|Ij_oDDQkW#KLeR^d(*!-IMj>(B2-LwpB2wdyEydtbF}uNP(fPI@0x0zt`yW4Y~d zsJ%5;{-dZluENkdZamvDYKfacg(x< zAJ)ug%@(e-Zy1OD%1j~wqA=2K`+d*WpE*TlJE|9Mg$mBT7;(Lym9G$AG+8sfe!&VT zN2)DRY}|mBs)mZfV$y8og{?(lgD=NkcX`9e)toN%Jir?$MTp?>{%UM?P`;^kLdNal zs`+BAT=(=*{ovn0voN};J&7PcF;PYX1uNr;6$jhRylI<=wTWR|cRq8i4eD)Ouj-bI zqh$pY1-kY{OA0FRtM+~5;BKi0N{uUGA(_A%Y1yMt@D}yskM{7wQ@0q(-RmpH2st^y46EnKJZOA5qMptiKU=`;+ zJea#N=OB%=%L!&@!`C8NFBZ&_2i_30dtoZGdyp3i-a}&^ozhj-?}NaVCFB*qcDT$C*>wt&5zskGoOB(_Xx!9;Liy72buCEtw zpk2Y7QgK{ce_Z)3)E6^_)gDoPbzs}DfoiU^m*I`OLxSnx-g|2jrT!SUd-?c~6S2Xl zBe_}oj$=%O&+|!8`B)oUB zuj@e^7sYDi^J~dcI*E}|tluAUOhO(S6i;`%GRWx@h|>@&1QCce1)9C4QQ@^&sJhw9 zwG}K+L4j9}vFX65z4Dvm9lvy4#OkM=1w{t-9(Qha3%k{7yb|i-axcF9NxrqPcbRJC z_sqS8sxRaw4f!oUyhp<3l^!8itN7?xSCya59)i5D``1DkCkqq*ZPVyAeQgcX@14OqGGn)k#6?bp^37c*3Z3S3kfh9o*;N|ixr%7WKppU0|7^BsRMmoH{B0O>EOA8F@))}(%M|3 z7Ou|aQDH-T(-=*o=2g4+E!BJ6R0ZQ;VrX_KTr8V{GE201Jr!oO$X+4N0`+am-MAP( z`+XUs516CrM|{OaD~WU2?1QC^BNqS_F=`@RN|;TOs6GFQTYG`g@tV)zE<84avId-w ztk+bO!HTuK-p&Vt7fAy+kUNxa^o=LZz%~5VHQ9~Yb15y)U5nbQb3qRbw;`dS5ar-r z^$6pR?)CQZ6+9z$KnBoG2%>P$(Y`{Qo^p17e4RVN3NZFQ68s46iYZ1iu#Og1iwu;w zUEZ`pgKNb*UGieaQ+pqR#PiPN&nJP^4Ajz{P2y?Pqfp6Xue9W`UQ4wsJ%S1|5A+<| z(7D3q<(XrT!v>SxZwCJ&k2j(A-FCD|bX7^w>XSo}to=-LLLMjMIp4?5>X?z(bEUFJ zU);rYY5PnEGtMacx+#)gIb%z^VrT`bq&R=)eiYYP1xe7>pUS&3e`$}LjLcJM!2x(G z@c4V?Xh{z;|1V5W)!k{$fJso(BTb1++5M@iFs#zlY`ot;$Y2+s9U{WN$7AHZV zWv|LcUf?vpc?i&!{?ms!Q<_L7E7aWi1Vi3yD_O>dTt zFTJc5sGeV3;Cmw}yD6uPl?x=?RGSR5E(~_DP+z$j7sf4p-D_xQ;0a@)#t(8FEJ!BP ziI$A;h10xPHN0>K+?c(By>quJAv*tj2qH`mD;w+v!vuY%T6+h_U0(C{+TGwT0pkf9 zK{&xCO;&{oG&zPd2c`0bEr}V-+x{$073MV_jm{qSIKTJ)tXZxVD8jy}(|*%wtiViW z(4&|a2FtR}j|Y7XYRXK4HHCA|2w;4*o$j8HWc5?*sEWqXi1pf(t+?MdQ>sU7sr(sp zq=P#)tutG`*fBD3Y`uDM<%RJ!5^~ES$MW2PnpJiz0W|K$UQ*-u_fgYyzrjvxm}7ir z9$dkzqtWEYfGIy^n&ejBNHRlSjyPIRS)G)n?ughu^?Kssu2bbYmkBOrdOUOT-JxU) z$8?w^gjb#aCeh=!Y6G|_H;kL9U#Ls9_UedcXx6cR&={W>dsAFJS6#g~4W;ZI4R}A2 z(Oi6M?GQ9=xL<#1FRr0Oe>N;WW-RitVrHXEbFI`htB*S}lGXD;n-8%s*GiGSI>`~X zyt8HcHBHHq8*&V9n`DvMg4nt?+5L$vwoRx>(n``>4BuZ&-a0Ta5{%K)?mpm>$kYdlGch`P$hy%GZ~lLP)wEae_~=( zu4J2=rd5-in=4x9)p3;K@6VX}q6@R`y*%`@kg&kQf2ynko9!&^rC!t+?ck3giCdAu~pSd{zr(X|2He9=1gqiBP8CE|cN zG9_6~EurI!Al|^?>M%B+X^yNeFR65+NvXI<0q#Z z&=knUtC-h|o`%@w1eWM{1UGxa5i1&^ls*z`^m&Sn=zax4*KaoYhduBFKZGlP58%5g zp0Zv!R#QMMnf<$^yO&AUz#UsuCPp+T^fQ}s=%VBuGgHWR|NU`3O z*kFzfYZ9D`72H7+&PQw>L+~!NTv_(3;B1CvZw*xuesjt*{T{xPZqaod=-r6-V-aJ5YN&CJE_Kw z5slm~z;vNieS-CB@pl=Vu;5H!el6hvT~fPpFGkpd(muxTw_@G3Mp^TVfH?ZNEHcn8P8)8|O(`xU(H7xB-1^(Nqls6!TFh~> z3V8MHL`XmIBvlU0|1g)7l$vh}uo2zx#qM~6)(V7it`&*4b350A$bVU+>*V=(mBaO+ zT}gBMU>6Pgw_X%{XSLYu&UqN!#C}Z^q~(kB7rIK_3Ey>ptzg$+lxsEK+)x7Ed@6`B zLJ7RYRy^EW2!=ZO?Lo-80h5*$P24?5`!4!6HoWHBF(`g~E$yPqV0BvE3j}ZQOg<1z zc&@W+{b5K^TFtr}oA&&2HwO+GpS*&)3!Cb8fv+@11S;+A^y7w=@`O={+xF<~8+BDB zj(*9i6+hro&{K&!4J8GQh4NWklzyuV%_6X@Z$cNS9Z{YHE}!>e6|<(p(Y{FVb*=Atns%hY$@AXU zi)ii`91&@uU?U6U+PySh{ZhT*ZEJf<(gAMRG5k9A+1SSO>vJF{&uD(EL~3zQFFLa> z7IGw`^^YH2N3%!0n0b)w1~tj_hm3Xl3WhXT#KHiWtgD%G^h1~Dk!#Suwb*zD1|N=P z>(-hi^+KYVO~^<2CmQuZ@zAUjyw~{Vm&3H{B?#lt(DO9d`Lgg-=IG1d4Fo}0awq)< zUVa1*zXk}o{_r~hQc9zsy-jOAsE~N|R@fle=5n?Dx^d}87a4SK&=LeDvhz?I>*!c> z1B%!EJiIxvf#*Z}vRMJ6Ckje5unENe)O5T=;G&h$Ej*Y(-8#@PqZ58~Nn1@jJ*Fzk zKzL5%==Q_MiODp^%<;v7yZL@boUq56!s8C>QC zE8}NfKR!44zBJW7FAXvU#y2iffYD5}12yiv(D?=oQ~G6-wP+YBKLPkw8$ylXs<-v1 zhbSFV&Our3gsGjaam!EI*D`4_7l1PPnmi4TV5K~`?>Xsg9ssz4MmSFEl74bdti#*i zDu5+!V<)FKU2Zek^wzCDdlrHdA~7nB8Jt2)N zE~ZQ5K%F=LgtvhJ^6=dXXF9XN`X8Iz+mnVsnkG{h+^p|O1yQlCYtB-!i$5xjfuNuV z!3pNuOXv_j<9+8?qIOspHUYny5Y~#cg9XsOQ>M*@>t50y5H#k$Nzj93o+cOVR-J9y zh+WzsU1ujCnznN!Z^);tZ0roo*g2jTXYAMaTzDz9FW9POrw2Y`%b($5A}O_$MXHn} zb<&Mb?gFv-$b;~7(f6m3)rIc`tV!<9g_ISI7J3-2>$>z)NZMGLoX#HJ{iZ@vvjSP| zbKQ)d^M4i(P_p!@t$xWFt(xOGWaUCIt+C|oJ!O&TW!TGrV+sk3g6n*To!&*0`l~XXOqx^7P z^+E|RwL>aRszCt!IK?q=~9b(-P6lXy&bV{GpA#A)_(CRn!29gTBG;n21|lWTU6tRRkHk z>*}P<^SOmkxCS~36JU8~(J#VJ*|GiT!xXv=J^XVuJJ+-C@l>UDsiLcE-ghi@T|&&y z4;tvt?z4h`|E=*@Mwg%K^*;|kty<2P>oQkRkw4TT(VRcV&;`nwUbZ9`tZFXNPwE>fKN zZML{jksVdFTl_&FonyqtuKDz_=5^^*Yc>~j0v*{J-e-SdlaXaIu$Jduh&LN}?U3=(LHFNIkQ1M#)HdUMx?T4N>~$NjNks=PbUX&JLG-))`d7QmnY zJ@@KBYVHcO)uujpaApiw7;9-YwiW_7e036YYPgrHI+wD$mp4i)cUhZbL@%b5*>|bNqm?G6W!MCO;kVptDtv6@CeBJl5%QWZ6+xfBf znMzuNS6MjPuNKmokvFNwK%v&{DDz%!VO*HcaUkFO{GmE#=XRp!aqa>j!J3++fC@9) z6Mj%`MJK>o-BT&arVk<68%^4D)Al3bIcZ0c5%ng!2|CC0U@?wYA$h{Cd9lM!o`HG@{Wye`@6E;$C#TpTJgpIZ&#woyOnm2Vu(&Nh zr_*haR_>J(z)wOx_Zr{Gj$dT|3en~z%F>_M(|U~Z6~vsqI_{y%1`805zkQ>r7V_k$ zF;k$z1MA&uVsdNy^Z4YPbDLQ@e&xwj@3W+G&y*J3 zVE#tC^cUx9Ifa#7r)>~7slSgHYU5{%LlC-aeYm8Sl2`KMo6&edC@mYj17BPJ-Di1_bdFj(!HEConm6}UY1p2aG6Z^@w5lsm z$nM+*{%VOd4c$Jyr?sbGHE|c7bIo?$R5&7GAkaIsm&{g%8zB-n2jQaM`8gLDD`9iP zUGw>U3!;i6-k0ub#OYhvQU*19wdw#r0a2Rb-u|5Dnbkyj1k&{u@_ayu+%S4b3&Eki=qCcn&e1S(h zcu%sj`i=74XlHTU=hd59FfP&Q+I&l7ihc6(nhWDz86gok_(g&l|yPZ+(5;6>2d!HbIfyLLu(8D_wpj~g`Nk@#T570Hk}}M2ymuF zf|-$~0MaO~^N)M@?C^*qZyFBMy+ps{jRb+*e|!pV7IeAk>mEU?x3#ZmWh(&i!rMH3 z$a6{`k2jiQ4jqY%Sa|DM`(S0gR+=PrWY}xiyY_KcmTA!Cp{JfP3sX_Nfe|58@m%P6 z)74mb1FCgNA0RO!Sft{9^a)%OS{Ro}o2%i+Xmb{w#9dA9vHmR9JvWZ(0rCRbn}hXR z+4=rek$bLYn!ei7n8GP#-!6vYzc>30k$Hh8C=Rd4(7>AnT^>!1+ooOa-M~!*%=03_ z5w{7<7cV`wB&=fU(=FOsa65v;L+27(>`N^ddlP{kS*w@=M!T9=R{j08ZNFq9UC+xR z(_dHf4fe*hP)+rdrv$?rCU0h@;eJ92%HLlIi6p&p#5Z1$&YNe%TO^4a0-6{-&KuHF zdS1-d0)Q_}ax(sykjAPbION!#cWKO)m3WVEmYO4Z~1S>0Ifgieb7)ay#<2n}m$P+c_Y-yoECl)4I)x)#cCqdGhoS4Iv=V!o))}e%v%x&*qv|9(p-iLj2`U1RV?GBH4L4X=`vQ^QILV(c)?MJzgPn1699tvrP54Ba4 zl22u2{c zD}8{DG*jR9a9eG}qOGy!q<*p-wZ*ffk!Nwn-)vqu6$n5zTKPs8z}eC#CpnV#{$+Lf zK&?X$HV{VP#Ng{Y`*a4V`ZTkm=lfbz;xA4KBy{BjRMD*plTrv zTwDdtF)M{gy9FtRRA$NoF#Lc;yg(nkGzB-WEg6)&O_cO58@CZJ%y})Td)1Dm`!*Uf zg7-=;mVzB7El0Gf4!*A86;emhu_?qzra+cioCVM@y~MUkMM%_%E5~m%^&d_N%LyJJThb2<1#qH>-J{ zB9?wRdlhGMg3VOHlm3L0-r(}IehjG)BsA4?#$Ml%YFC{Fh9I=gL%_-DXEd!|HX#AXI`Wyt~sp- z(yH*UcX)gy&Cj7GCe}ssw zyF6QuTzR+^&^5C!HW1eQ0mrCsxah`Xrw;-US?su!V7_FPk>HLc_f56GdI*`y{Er`l z=I_P)_dDZb?y}rPV>0(s`{^RTe4~tGfCcSSC~#b#B-4G(*y~f+>AqAaTW8yMT=!45 z{(h6{Hp{BXRJ@8!`P9}{Y~Q`CWK&4zrvOp1^5W{q!pbbSKm5=K7)5HXIx6GyL|N}s z;QM=tf7(T#$pMjw>N!x?kVl<=W1$E76T%&kaft?YLk{8N?deX1vN9_8jFerT5`$*Q zET(`Hps+|FePhsAzS}Q-H|k?cs4D66>==N$5Q|C5prCDAlnxn`2SgiWk+|& zlz`7;Yw(^7lo?yE9&Sm{dnA&?9G*bMOT zh|gWVSQaVz!W=lc@u^QKlk#QV*UM!Vzk$@r7=V20$2RC=9rj(MrL6ShEfNUD)KBf$ z+Y>VJtE9tTyF8t7F~G@wWF5DLr^GxeQt~?PBu;FAJC-G*8u{;D0z$Rfs?i5BY8zy_ zwV_B*D_#dhfAT zi^`j=od)Ft=Fmsk8X22|Wc(E|hqeqGdEwg2&Nmf_EoliZ-mo8S9TkeiRm~rAx(%(o z7->$oHggHIFI_ztoPDQ7QyF!KKD)SjeiiV*m08Q7NUXEQIL>l}v8kJBlCA7vub+Kt56b6S#r8VhnGl8;`ib5$rD>nT{aGoZBjbL9XAGUs6Yo7jg8Q$}uuOL|5^E4=2%~b3%B~ zqY0LZ(#TAB`0vI3C3EGi4`ze$p2e3k~Qylu+4Dd@V=at{n-kpzSJAV$|UE{ zhFV~^P8v^`Te4+E((h{~b0MAtk(p(GAe_~^u!?Eur7z)@qPI`z9Mm_E zWD3&f4!5;XvnzxFdH;-wbL*2*BZcC%R4ebF^d^|@q0>wiip%8`r=kmyhI zV1n_1{Nvr!vz+#?-A*-|q%2|L4s|g`QYy-VS&MoGgyuKv#cPe@aBqgY`|Cv}hYjMKVz=F1g|&5eo*yq(X84boCCI>Gbb$ML$ z;^m%!+5@U@pFk|aASrI?+l*rxSr^R2CUYpUX$4O`J>FkcpdIXc?|<0*08E%DX@kTW zF2)s0S8WlWbbJpal+-JVCt;FasXQDUPfqXiMS}?w@sl{%Lr5)ibCT>2+X6mKiaTWF zR-?v~>2Y1EX|xM90hF(5HlVDZX=XfSEy2wEGMxz=u!TqA&80Z>Z;UzO7}!17XW)%* zFv1sS{P2Ye`Dtk<&-0!!1ydI*qWZ37M=u5OOREZrV6)?(1xW+azp>}V%$7&RmPjz= z43k`JIl`gdHO#ZR-m*w~oF3!}1yCzF&hEY$&k6Vvnhr@t#x2Re6hN-ZY<882PkN-h z3Ci+7DyI1dQDDji9}Th6T7k$gi!iF)D*^=<8kz(ZKC^q-up(Q$^`W*ageaZFYAl{EXg%Ru)G;x-pufD_MvF+Nwtm1t&G^@9PB=VAC`Y+49G8X2FN}e-{sgB(&Zebg4M(qV=Bbdst zVl~I4%~AOQoN_PqaI=`E0na1v=+g!VZxBmWeJdlY1~Pq#m;L28R>G}B zK6>uN2>hHo=G^7ef_obzXn4WY&QH@6T07oY3jV(=L2n6%1F(x%*DT+-Y}Zp6wLMYB z-$Mlqkoh~v{E1<(l_V(`-jo~SVMt5C>I|zE>N)*1OQb{R7k&z zPWi87`oLWOqK4kH@$Cn^k*N>GI|;zq|0lu!eXj5qjSD1O{}l?~rxoj$Ahoo}cU*1x znH$TFF8N_;!G|F8&s2SfbX~BeMb6o{?1-v6By)XFq-FCbN#5Fb8OUF?NRSCi+2mgY z)Zd#21<}7Z?{CYK{oC?B#Yz5lo1coqf4j}U-R9qf<)f16--PAQsodXg^KZBLcdGDF z*X8d#@6W~jzuD&hIc(D%%IbgqY{Q?^dnzl(CId^J2FIDfnPO;Sajsc9vB+Xx7#^~K zzK=ow2k-x&;!u|iH42kHE(8`|&|C3BV zRoXiJxXU5ZY1q!d`+h?y63;^ekkCAmcU(pU_`ZqRN;a4&Z|KjK9LK--Pl`w{k3xN^ z=8qdxjD7A^YM^-euuz@nY(WkWhE98fGaFAjy~ZDP!4lEQUmHu%w8lc%D>O8P{_DGD z)xWNhd{X^&CR4$BJM@z!KlO|#{6^FDvfJp&Q`Q>5_Ob4q<4~If{BZ_2?KzYm@Awt* z$E60#zb@hIleUtVV~Z-%X6cF=2uh zE!}fLyv*M4R&Y9n<9yOlTzTrhmRGN(s;CikroG1-3*UARFV0Qz>z@>7>G4cCa)zok z5tD{}`VwzEr-nJBU9|GQHZ<)Za1tDD6=9_n<;=Ul43tHRF`hg^4;r&bEN}8)n*5IH z)5-kz2InQy%3KBpaJGU%^T!2djng&b=foWz@v|D|EqIYk=|>HKQRN?gx6 zSVzcajC}1|s?Yxi3;Ariz%MI@AxrXpMWX(z#C{i{!FwLm}U(dtT#lW%u4eu zD+87W8~!CrUlB@Isr$NK?yR&?`cZ>a6vk2Ah9_?lGQS5W5mmi(B(9kp*rOit8BWX$ zwf6d_G(?rWr3x28?}o3A6JeHGK`FoAfSO{U|DjhmAbKq6t_H272|sb>;j^5hJHppX znAK8ZNV zD?dpYXbFhkOo_*-IbU5#`%~BIv-u{fNFCf{%mz2NsQ0zOWo(kEkhZiE>-&a zi2>MRqNMUe^XfxS@zkAh7ct(QI64fzdWT>C$oNnj)ItYcy@G57BWUb*Qx%IErua#* zaJ-=q(199@!_7G^buiWTWc|zrjZS}ywbNu(2?5+twA5exSUu&`g~wYji55Ay#es^+ zr+H9cQl<{GG^32Y`MJNcQ+6dZUR=dFDOGV34r*cHW*AhvtPbdr-JU8vdvQQnz9kX87b)P5KlXCxoa!+tLz&$~-EwRv?Ec@o z!4xw6ACA^nXu9WkN6#P^xFqO1mcRrwP9x|9eskKKUyLc|1yi9SZJWK>P0IhCU1R*l zaj6rwXP|FeYPiXRwV_9AdZFOmf#kJ5wE7Qvc>lt*DI|2ax4`C13+}0piX_D^Lt@12 z6Q1V~xB)5?7<{(Xn=U%or^(J}Fk7MhG;{eeOe)86<1Qe+RE(f0*siJHLY0^Y8K zx`imQA?ZKt75*y^xU%K9qO7Ku3}Hx%TH;s5sLSYBqA!>3}%wGOFc4q2L$zf4uq4yC=YW!;L_TwclRflL-clev;J$KV)2{4z*#EK_gi8 zq?3G+_s3==-MMaE22qKWE>>>nYCqQJ45MqQt zLdbHz*NsbSO?%Fv_xpb5c|Q72B87XIYp$8OW`6UVFHPx}b-%gK(gdKyn>&oyza?@UX8h``7A6FDg4iGu;A|M~>- zVK@Dq>is(v*L5eHxZsdv-q5j2g0JoXxGey@90L=^eMDdWGsXG0GVh}* zvm26|eHvAnzru!$s?4a${O+Dctqj2PjXI`3ppgG%L^G-~qbl^-osg4(kGu{MBKdoCC# zpyk2|`ll)iTEr=MC2-dFi2KL;2DFM%(Xdh^^Z)T?2314B7Fbh{nfz0=0Pyt&8r%R+ zE&Ip13XuCuF!O%T>l)EN-9T_ZnRvL#epI(X{8QE8FVb$nI{!B- zS4r6-XV9oL$aR9t^tEpkkg~46GeHO4fZLg}m2kbu*FXf23poNNZkV9w2pysAAB6|y zHNYo-GSv+(Zi(Yw5w&14ymaMvwFp^-RA8}=%G{66F^z#i9QHD9M#BW58$#Az^j3+I zL?rSDqjVV+lq67pXSHTa%;V@aMyeUFH%_xY_x2c(&@Y+hy;CK9xeY@8s>CcjVL_#Z znKqv?-!ul&E`yu2YQkGeS;w;a55Ic%pn-R`B%^j(NSOvy5sXJ8Wnuen0G{N~swgzp zl~W9A>qnbgZb|KHm(l@;aUo-6b+d|RTyFAnqv*pePnaD(w_TBva=8-4e_MAS+MG7W z)R2yENL$Z+sn1(?f4c@~4J504YqJ56OYghLvRhi*iJ9xi2|pf>@kwkGT|-Jw6&IM= z%I2i$syk0pdK8X}pOPghPVweNCS6DoeH%qj@EwM!uwp?Ew|O|(!qc&8Q9452Ht6kS znrHWi1rjvFW>zi#t%e;)LlN>R!@3 zr3+qrKj9v23}Z57k$lD5cH0vjk=i{$Qj2u{G7sRWJbp+2SfuyXV$*lBk10FU96aPEs-fpPE{I4l!2P9X6mjwMvW9XU_%+QvNrEiBD*X;$ix@D0;ECf=)s~Kj zwp65cZMdqb+ksT?L#{_P0nxW|N-<3q-0nj?)~x&8;h4?kfN1oLcSBFmQvPbx&j_04 z?SK*?6PRScX03gHz3{LFXVd*i;CF^Qe@Pz7rZE)L7-B2v6Sp|EFUBMu;$=C@v6OT= z1W6b$o@AK*?0W%jxMZrl78GGm8M>Z;gv23YI>2IJ#4(tAJL>fRR8*Dbvt&HDeV!#N zBsKwH4va(@=kC~U*o_(D$-)7GM+iws(1T;TKjbhMTLGp!L(sw==5=pU4+s52+1dk9 zM7w_H+>hP$tD!U0;+(?9MjuaJg1k*B1NUz7Xib;#pg!pJ(|Na>J-*W< zrbU3*VW~B7nnyjY&neP7d>Y+Y@GVer?3y?e&J)L>gtn{ZG1v%>)Q@*l?&Ef1rw>85 zR#ihHzppy+tHuxq$THLyAfhkI-}f-kM{WRF8&va!aBJx@Ocm(t$4Hrw8^`lfK*o;` z0oNdisA7TuH1nt>E_1+I3zO>a$L^`UBl0YnljZN}-g2T;G{|3V&n+ZD9)PQ0h))-tntLX0?S(HQ+bXPN@D_|T@*_ex zupBT0!{!sop+RO+Ge}C@2~a^$E*;|3G%ZZ8{IPN81nxuWmZD7T;H`a@29Tic_If-1 zo(UcS)*f~`M01a5{K1lxILqUH^!0vG@}73)wLkXY|I7RZd?t*t+gjcSqE6{-cC;2$ z@k$h5LieDWJoNRoxKgU_#(su1EJ}8^LCUwQC{Yv-$g(_f1^KS@j2Et7vH);ZI?E0` z>8PyBLg?+l?XWl*8;t>s8Me4#u0=MWDTTQfll40#h1IT=fIIN zGL==hy=joqFVxO!0ASgHdwd~by6rzq>)qS=(+s19dX_yOsRfiz7?KVc=J54{-YXG)LD1Hh5?{MZQ9-d9lo~oGN)J+a1)BGr zWCgZ0{q|WamlnH**DKzL^L9Z69Va1mq=Cb5?xY-7j=@0uDMj&GJN`se(NXQh@*2tA z2K|-{fFbf&RAwdXD7Ql@QWJp)9IT>MO;1 zNafq$a5HRdYOw>xVLhaj-Y~%rslyPG3C2wlwXz>Qf(?<3_#{rRU??aUGY0tJO``Xo zLMlsJaxM&Tcnzg*JR}51UjLwviGnHw^#IOp%&31pw78z)l*4`lLx9IM-j^3W{0F5$ zxSj&1U}2S5s=aN!sJG25#}_n#w33hXg(9_0K&%_H>vB&tk43jMq@c{*BopQ`dAezl9!x+SGq@ z5h68lazh4=MF=bc&X6AJa9*0!& z*d$;A7Z7bLFA97C@4q9+NdW`{VnA_rridVb8^XrhKSaeu+yKXKGqcG^JtSVDW}&1*xPsL6;Khe$ zL;%wN?eA%bDJw|S0Y|UALCnnn(-y3bn<2RL>-9mX0wOFw#G({%;DJ5QmdZr)?F=q^F^`I{#>Se-$Dy!o;Na{j=*UmF#@ogEE$M+z0q5~o3 z?qK(6$2LFN`R`s*2Gs|WRGET@Py4*jI=4)me*4m6myMdVjX&%;KT#*l`MUiND>HuH zt5)xrrDcD2;o2fC-Aha6RUEljx$qU;WY*Hzd#;Unvu50?%@gf+jiI?s+JF8VS8tuoXYF^LSgF22)A^g^gt3(1 zrHi8Sr_LR(>Fm~`*!sPGVI6-3fAWmbihx&=$py~Fhh+mPtDZl5F=0%U(h0RS?{AKK zZ)Z4W)1kk8rjVpZ2^S=9LygI6!p96hPx$`O8i!6SYTC?iw6={oGH2}UMdQ&c#<0@I zgU3tPpSx%idc}q)CGcWrt;)yPm_{bOaGbDT;o?bUO8VpArANb3*tCoFP3UAkpqXZX}znB2t^XTp;p<^B< ztbQC!on=+Nxoou{`=!yi2@Vw1wU}X%5%F5vRy|%d_8GY4`ERoCjR~s#_SuX%Rh`b~ zzsb2b#%}8gAZnXyW1JPXKbi`}psX{|s=VgK)aWApf_UreW+#SU&TQ3I&T2k6{LGYz zvzCqlxBMpJq>AzhUITd7Qw{T{N8C_RPAHjNK6UPw4K{mB zXO2|z2gU)RWvohI3KwsjnF55?{s64>9#e=^qr6v$~cW!2d7 zsnPqC!Ab+AeVCAZMp-Aqdt}N5R_06P+H*Hp(BJRFS8lgd^4CD_~*HlC_;_R!)xbu_Ak| z>v7}+V`Jku!oYdd>803L$i2fIQ(tdyB9VmV>S0X{Q}Htz+*k|y_9CR5TKh}`gMy5t z-CV8*t$)+E=W<2386qu&Hq>Mwb?PMITx+RmY+pV?ChSS~)P~&L-20n+ z+w&RvOaoW50^J_WIBve;j+y?`5L$spPlKs|M+8kMt8Z&_Acz9Pu=P<4i~025fD&0C zq(u9UDR7ILn(P7&>FQcqT8g^Eq`KK#X}jB{NQp`?wL#`xW-nPtk!sw@IgdKI6q}3C z7IPyfEXbtOF;|M;y?fVp2h}Gj^zy93M3RxRs^gR?x~c1`EgGAfJ5@H*f)O%4u^^5y zTZN1@9*v3qzlw?Se^RELJ9jPy-6{~QC!Q`cM96%lY*paCd$-af6}^Ob3Aw9oP~d{+ zG)^TiBwj`As&F(zh9qg~vV|6+g9zHB-rm(qQc->H-(TX*5ba0s!ZMl3oixk&-=33< zi)iz??pyi$-Xxe~dHICqRxbIPs>G=w%;WH9a+ z%FYE{C_~5!uUp|VkRNGGoqGo zo-K${g?5X68H z*+&=nC%dID6ZK|Thx0J03MVCcPx(l>ZxHE3 zFl2efTZ(_+K}xY^RHvgdq37;2b+MVS8MyZG%pa7pBM{mxwf3e=XhSiZR0dx2Mi``z z#m^IOtP@$DWtu$gDUunoY)zRa+qn{~nIl36r*!M`upUflNX5oLF2A_>1(V|@<~3#% zQe7K*NfBuLLCp*klHKv5*E}G=C`XH@gT)fOH=X9bu#aY_4e-9xO9FRDd8$F zP%sB5?|!!!uGlq^JYU}=qtd#}uc;0pW!lp-$&0XBl=kQpUcn>DKd0dBb-UZazKiMa zLz6wI_+3n^ogAb>H?XcNUt&un zZOYfuW%FdU#09qo2E`a@y3v_|`*3pP;8C67v>wER1}z<)szUG41a->;USH&42Ejbr zWTW5#5eJe%pHSqGO)PSDT8i1K;u%O5hZ(ppkz*H4neYIlj=YQs@}*ClLj z`oO(->+-GF-Q7;a)u|M%++%9EiNI8Ze&u>sSY!(s*?NUUrtbc))m|cEK<=S6Z9Ke? z4hfeJC%)8{f}|Ws(Y<%VK(+A0zRAKecJ7(~MHW%kVo6vVz8^1ZV#s{9)vANP`9AYI zkYg%UQzi6S_I+Q&#!7og;eUKqfkeIhk!Vw9bD5Q?A*QS=5cnC^?O>`jw!DpXZ0Kb# z+5q37uL6;ZGWa357hk&=Q3zd23hmGvvZQAf@7zOz({!JoHj*yU{mZAV`1rKEM9BNk zPdmi)pUUAAr4nd%<(0wGjoYBkjo@hG!PyvBWL@hN;|8UnNolwkV-u5j+8~H3&|KQ! zB5`w_)l#t*lSGFyJmNCBxJH>r=r3>Pd>w8uAjvuUUugt=NK99)92hhs_RjO48shNB zr0+8fVW`l50*9|l6ru`zjA1Q`(0uMVp?rQTNz&)X2SUckGDu-vKkd`W19^|QpDZ-W zYLjbPFDg@{;cY?3kL%%kOIA389uS5squo4{#<39Ix~+Z z7K`rqNJZ0GjE;xg(=wu}*Hf+PX{l7HTOq2?X5I9Huh;V5NmGB7+X3MM8>8*6PbD4n zUyKd70B&j4cPAzWn8Tp1CC!5w?(Qs%tS&hUBndAA`zvP0K~4{m3RZxGJvub%5)-&t zj!q^dca|EnmL6u=)}PN0cN9}Iz8@4ow}ZL@A0|{sBamf=kJr-(m)lnydXOefu6=M= z?Ajo!tPu2QI+F2>crGtM7MqORt%~lq`?O0m2-<};9D}}tS)Q!5hB9{0z)bjPf5x&s z3A30kxDeEJ>>ZD1$yQ>j zy3gr&@|$$>{hR_@!sf%Y0ryBPR*HBb1!&h*`au_j-dO8rQ^PeT{hC-i5?`Cb5OB*f z$_vh2%_$P_H!z5M8U_!gPya>z{W^qh2P>5}s()bCqwMAB1JBks4=9%L>U*#D)kkn6 zUo{WTv@n}bk}8qDLGFIqc8TVfFgtwRucS)+{4XFB?jczkLgg(-tYifaZ8Jp`2%Q%G zS5|a>yV^TQ9Po=4DgDbrUl>%#o}Fw4s#l;Mr>yg_5DF)4L6PZCH78KW8m|8hmq~s7 zRaI3zX>5=S!77W+jvxj-GvzF(K7o?fhe{y0xmEc}2FP|!b=K7dB@EGnBHan7{Sxsk9hO>*OFid)f>mEUyMzEJ(On}Rt zpqBcfN;q{csLV#OK@vzo2cxi^uo$sRf}Dy1B_<}f)g5g3dsOAqBy@!zjl<V*wy>8`Oj-X zgAo}JMcB#Nr7>V*&rjAy zFQX+8eLq$`qDGM7lzY(`@$vD$EDO8<9=S~z+jUR0*SYTsQi3=QPM4Ho8he*v;6+fg z31SIB85KLzAA`^WmaH`y$K|42d;Qo~m>q4|uU9})Fr0NGH8m^RS*G+aFC(jRDx zk#pZp(CWj(wf~r6p5*n!n4*!S^W}vw9iTXGd7-DX^N3!160WVKMUBokCx#^=cLV>R zplug=W{SF*MLbbnAjfUdJcKktv1PbgDwG(NN-f_T^(kQTcDCn@AP07Bzca{z0Tkl! zS;xk7yt4v#QypYfIKoVUfMk>nvzD>PUU}R#V3JPId_wbQIF62G= z_WZl;X{$|_HvGun>m z-F6d%CMG5ZMHVd?0U}UdaC%`3LhGIr4!gD^^`)i5=g^~$2pQexLIB3QPlSCNT6zh* ze5LMu{rTU5({siMs&Mjrk-)?ghqD|=9nJxt+hZE(Ca2EvuaevHFZ|2OwBV(+r3k4i z{`Niq>GSi8LQ7`=nDcbRBrG`f;Q*Or_%NY(9YTRxAPOWox}vy6WITWg?HiAfX%wd5 zuHkh6j89~{rMVa($u*bzw*mlDS4Wa^H}Z3BoIe^%mMl@B#8vyChOjuK}ZRcSqog5Zcy# zxAB4tu)o~hJ`bVw$ua__XL*8XxpSZT2x><1&nv)CE>CD`1V#waCk4Q+a zMXQ`Lb8<>?$K!gBSOhf|<}ZNSE3sa%GrfV7#ft~66+4+-n*QbyoOx?a8o1wxrlWi7 zRc&cJ<#`qHASm(~ubar~_jV27%d-pXlo@8Z}_(?AD zF>D{P(|D8f?=B)F)0K?-Tpy)A4g2Y~Q5ShpA%;p+HL z!tOL>5+^jcVRXGAW2RYLTphthQq7!nBMy2)L33UuBXkDY-u%2)=P8vH2qO|c!|G%g z0j!Qpj-$yWo7$*~KLn=46&yw;`?8ex#tncv6zk`saO>+H95xeR4EQ<96mDfj*e>bh zvYE(Sdg^*q4VeKmMDe0xu-x@~-l#?4Ir^I%- z8$+Rwq?Sn%cS}w~5nUX(b$rh+p#P^W;vL768U7}_^DGMz%5ybDAzj7+-LEXDi`=pN zp(WgrI3p7D&VsJhK_XO0oC|PK=j7s|WFlOL5$B&jA! zi?2P(!_7K4q^ol9;?xGgOf@X*q*y{@v7&ik3jSC}kEV}VU$y1F$HBg5B~+A5naREx zH&}t{>17F~%khpx86PtbVEY1DQXYhVIo!T_z8C=fKrf`Eo&gP9dsK)Sd{@i=epcZZ z9jU~@NY*R$JtU$q4YQzfVEAu7HLNOPL>e#xjZ6?Q3VoIk(q>fBGiOZORbD1!gc-qY zD!?D0p~o-Mr-~oc6Q1FIHf5?d_3{8%ZcrtT))yNBS++Ap`MPrQACi3RRT;EwzBG~5 z1n`4j%D5OSi&`U`cR5P%x_s*MR3*rT%mL_* zQZYoxTR+1rM|#nAWvwLA z04)#dM}ppGfSz9>lt{mt_}FB)>3AE^5YWN;#u~8G!%)Asa#56)kRA+7?BvOEuRSt@ z|K?Mh@t_7>(gZM>qHu_t=%w?yGS&(Axio2qhBQ)^bXU&UL|8E z$hPlJ6H8CEO>n?PbG@W_BtSAbCWzbD!?9uzFSmm`MsVkC_481+fCkmY^(eTTjj@~4 zY;VP^vvCxg1xT2d*!jdFw`!f)$QX4>N!q%bVQ>vQiN&u6n}Vq#JrI#dltfhY0_*gk zgR3ul6vTR`URw4T4-GnG6zVOidwGNn%}HAOpEn)JkC)k)f}S+suG+G~+RZtSAxhfn zH|qyQu1%su-8$%)=(TWa8_<8J3i+CZx%_fCSM69qt{jHw=peBL=Vguaqh2omhA=!X z;f$G!OZ<9FCVO!1N!#eRCiSRv37s6ZV*OjS}*W7pTjXzgJ`Iu zwCRFa33UN3)-Ny{cRy>voQoMknV||&8Bev_@A@xr6F0=`?KGW-TI<_swrzcy@v{-4 z>*NXc+^Uy{#0+6TGH4?O9a^=5oqCSi2t`Gx{6SH;2532^1+R~G4U|mQ5zR&F(mymD z;yForEJoC@F3(zt_t;CcD15jvbOe}bxj|D4oLSr_&_}$$002LBX-=?hkJ-EuAsyd7 z0ikTpf8T|;2OC)|mJz7Sv_~PA2%Gl)pvWD@34%W87=&{1Vw=}%0>{H;R^vq|gw8?S zq$fE!+vzIfYc@%s0#;9KE} zFggM^lqceSpE((!xCjj&4dPO*O(1thW#vvo6PW(Xz6E)5nFbfL6G1T>z!I_e zI3a;vREv~uYy{=6oy)Q^i=)c=fXsOUC$E=<2*qmRjjxvm=32Hi7U7OFa7v;z2-#`u z3n1bKKpOxj_`2D3CVJdK6rhR-mve*}QR!|8r~_&uWw4RGSdgOv7lvT0MkuLAn*b^` z9sm0;b{en1Z38x|l%;_fd^oNIVOi>zX8&K#dZh{U#w5JnjLgk)NCx z$Af}i>Eao=xtj>w&Q4Gr#wsHQZ&qvvV8}D6QO7|QSBPiGa$kSI3k#8oQI~y@CucRb zn=j%a75b@V5At;_G5+33cXRdtehIBIHs=1$cBBfjCZ{hKnZQ+SGNBbNMWyK}4x@NR z!q6qRyT%Fn@$hhrEsO<)gnHTmpW4mHrLh~En&J!&nE~*9Uo}tUuHYf&7V=(2ZAJ{( z)fWrRP6KJh;g;hgi?QD!2E{6g9I7iVZ8=gDFOB=FJa5tGn| ztm@jpk6BgO_ZG-yKJ%^CF}?NX3zR7pyO?ce3B`yd7&d?$0H)2!@7L@oghXA~*Xnd5 zu26P8$QYsQjUpRlZ#q&Luq{Qf35vD=7|*U~<2K+DS&NTOL1jLU50;pmU~( zw}=&skhXN_4=~ol$?|E7Sid55%^wGP=74@)mKuT$vH*vTZ37uT)=z4u@RqcItDvcw z*9`aNVQD&r7*W6V<)CFv2zi=KPXKwPa(r1N&pwu&8;|?V7_<07x;e_5}tqXRyVIQS66l8+_Qe05goM=>5)iD(q55!hRsHxdE?k&#~ab z-2(59)pa&NgB5@<_!fD+$t5kUG%qAQo4q}iTj-`L>cIv`R65NSkV=BB6F;bNd)Sw| zD1&stf4(ckUyXG;g{R)7LTA~EEyLt+vgX>UL5W!7z6SbKq+sl3ZE1EC4s<_4bdK;K z1Wyg7;^)=_;yceySzy5+6MgYPJ&gL0f=<5V!71J&qC%CIoi1coMFf-$J|Vk-T1c!c zF7aKhlcuxnyq+Iu{ojd6J=b9-9Zr00tp%1SdqO3?4&myoN62uy9T4k~jYa7k2Z(H} zTxJ4uv<+}2iCLgdtuDr}0j!M;y;y;ey|msub*Ew9v|7c`idkdE(D(hg*>qQ-V6P)n z@re-@hPuW36$yHvSg9W^1MpH!V?f7tA#`hu3A6B&u?EY_5j0nIvNFUf?~)uCfHH}7 zRKgH-oA=PYS*lE#B)Q-BBW+xbwIOuDZ6y4a@{R(Sxy(YJoC4a;W4qnsGF3DWngPZz z0X={QD!kbgJk$LX0#0S#Q4hp663HVzuE-CIVonOZc(xIg=>=I~Mo_gWjaTn<%_H3N z6I)#U+@tI)Pj`f5%{mX7fp((1jcQrZFERb~n1Oj80dP_WDm%Nn$^fZ)l$N-=dX80> zxrYLo-SHT=6L9&ccFA($YZH{GE3#Cz#4}h&v$BD$!);lL9h(OIJ8M z72(7!*p%wd|7YZIpYeG{l6DfzWJe#W$Dsbmw+1uJ9!sn(&RCfbTE@CiP^%MF`WD{) z*=<;#p_kq(Wta#YJ4&uV)3BEb+ul_Whnltn?B6i5ZZ#NF0fttbAjw);VO;@|Q#{#@ z_!q5TOm}!6PUwq)8P>p@S}gb_m~!v))Qxi_nJ)|^j7cP}zN^_AbLN_?^bBC{wz085 z_X0PFv$>dj0FZYMn+~X3bnY?lVou$$zz8mbMSR>?qtGZTNS@+Xin-5U^!0`!K_W$> z;Z{yB7zykK;D0;esUgGn2NNv_145!P(1CDSt2`a(59d#=VdG@)Fdu0GKR+|EuY^?! znDL-N)=v=DUuC*i%G3sC(k|-~zCrC9^%c&AtSjVr$4|gE8349%YR0mVs&fvbkOZWn zbRBV6ZB||UGq{gUDER&jUJJD~lKj`ItPPhkYS)`bo9en(ib9ESbe_9S=C| z+5nr3^Vh?Pl&*_v(umeH2cfAm6VsLk$@UgA*!(505|96^QhYkzqQaGEbj96B98vhC~5Acu?7D1vY z`VGMcCzIi^G#$bZ^$Dby;|{DGTt4C3i!VlE&E@H-*o^Tbx%c8`x23>(3tgJ=}4Pdh*?aGEeb?P(eRU1x=0IBf|p`P zLW3 zM^Q@Pv1TjkH9*bSKRMX<5NjjMGd;4YQS#1%!z`@j(6Ys)Fr5%60|W4iwI8VB zBh{hd>ks1%6jEymzjfB^2<&xuO~np?Xh;Pi#L6(YN8-1D)f~fi=iFN`hxHhmOi{sX z-rlZ_lnyauW*)RbeJ6N2PzO(+;D|Lb^pgBUIE7e9i=PabNxCAM8i$(+4M9KuT#;1- zVyK=O0+P3&bk0xAq; zgcfs@>h*sj)hlAF)?CA@ZnW*~G1Lj6kCM zdV_&+@T_6d*HyoNIRRZBsBRLV2Z`BOZw?d0i)l{K0T&p~`2az_Pqx?zE(M1HFlR>; z&J&!5k@Mm95>d?Wd=SK0Y(A-KdEZuqvc;&4KOG{$WJ%T-FSv}oHx{f1g4w{5G-z=e zOe;AM2zXq=+*zxhx-G&XR6+tl=DT+m3sTjG*>iHUTz)bmID2WDP#m~v9yV6l;O?fLruf&zq_7*&Kk2*CJWQ`QmnBDCG@;*$^g z4o~DBgrUx7+lWO-9&6lD4uXT{dV&a6FzKoYn1IO2hIImQ5NvQKn1yE@21&~sZ#d!94kQUC999n1XSJ* z&t~3o_lsA7PKUcSeV~L3DjV4}RC&hNEM<4803<*_=VAWbnhQzN$&?*e0?VABj>6~s zWb!3m;2AdEn119cH-(<0k+BjXy;W?l@*f)ah`My(Ao-oXlZ~^Jjb9+(g*%=+Mkq;) zeg~R~F?i+`BDGNgF&qo7$DNzmAgdRoX@!TbtQ|ZAPQP^>UX-9hje^@?QSBu&<8_O{ ze6!jB91|PxY7uq?LWyTVOOSwV;hA02`1CsqJh|2c=U!FPSB*+Vx98N&W)gGxE1J7F zw|=A=Hv>E+AQ6U-{K>b@B4i~6+&Ysyj((_bl`Qd)3HlP(&=uE}a#7KSNpRU!YLWj$ zosH4srU%cxf{aIx-WLjzE!}|&{K33Z-S>TWqUr+N+bYYb7>Pd~44!3UoV>*+D4Njm z%y5Jxyi}zPG?$3gV%pkDCyFtLpq3-N1y+>Vhtk5kDk#E39^m+R#e9wmyLA^{6roHs|r9Z z1`3H?$l~at4ruhKge3oq?wp-d;HgVU=JGN z?_5GgZ5nD*(MHBj{deNipq49)VqH7kTLfK`J3V=V6wrY~7+h;Wiu^Ex)hy40m`@}7 z4!w*>{>G2SL5g-zusiz$<`=poEj*i3#78;#PtO*d^w6+Riu%JW*4;z)?k@(A>QFmc z7L_6hWo$Hrik-|Jr3Lx6N)l-uG4Wpa0eO}bb?gxIkV<4aVS?V7wWH)sWn$AIDY?Me zSSGV1Y6Ezjt2Y=&at%xlJ*lGnj3`1uF57C`P=Hac{VNW%K{ovg%z-At5&z7ACQJK7 zGe@S)`yIx%1?;oTOB(UINrK&J+)bH%ENn~Kz(co%P)%*Iko#o60$>&Oz(_Q=w(xR@ zo%#AQCQZ&bYc_)@bAkbayevpcx193@pbaAy4UY_+#F7Sl1!z+wegw1;1c2d86$Zf# z4|TnvqWq>s16mB@1FIP_(Pw77)< z&xa&px+LnWM^2>2fr~+%UD)5p-V!8J8)!SzkHy35floz=UDCCf2y*_erg95QMA=&cwQIGcytU+k|5z5piJ3WtD;hD}(7r z7bqU^c`*@(PV#V1PQ^QVh?VZODf%ZzBgGH);m*ykHA*rF!BX{wLWNYAYYyiL_kjZU z7gg7HC+*|h*7YVHo|?HM9oj0dR%TuYwHIdST<=RJ)e(PD3CEvw-&GDz48UTrFPJp0 zb`wo{E~x`CM4k9M+TGx*Xw!O70V%M2)|gRg&uQUmI^w#zJ%~+%Sz!oq+tE&=!N6Z= z%wtXVyO|<1It6{*nB)Tip3l#NTEi@5{c&w=H5Tr4g9aC7_*6&Ye5Be>%-_NUk1g&& zl)4DX3t{%2_YJNGGz7#V8J`grL`6~YkXyJ{gfAyN0sk2WfCYI zPX4_P#TGq*?&RJ_W&wA9yvnP?&uFMI4=ll^AjQv&zL6H}+pevU+ItF3@|G#;@5C}; zE<}u$M^I`*LHBaRj%QIn+nO4t;IGy#q)XQK1^Q;+3GzK8X7WJO)19=?0;Z^D?pcK5 zh^WWp1pNHzsG0$Vkq=~*yE5-RRsblH5V?lZsQn7tA0gF6rg9V<-J|Qy!5xD1ZHNtN zLM+QadZGVc7kNgPe{}iRAV7ILsz;-GG^$4*24RDl$nx-Eqg!Nji;QlO(Jhi(_$qvV zXpKWBc0_H5qh@5(jEtI*Q8O}XMmCHFTBBa%?5w3@o`#N@9sw#%3CcPrtjb3nt&?=2F2b+*=xWr8+88!Qtx+VCkvi4 zNrx`s=k*kYpvH`{s4ps^o4QHxtl;ID8{zcJw-AzCz!AbdY-Tw6U@&;?8UC5 zZe^lR=YZ)ymhLD!-$PK`FmULuuZ(;Pp^3baAt|_Uz_uAQFjMT?(2HRwp;MqTe4x8H zw~&;5g()CqQ3tBah^=$4A*7Z2Lb<03!$RZhX>q<^v0ouN|MBP?L3K*ScZR8Wt@;Rn z?M;FqgMvASZSy?<@fGyC(mHHDB&f75^J}U@s8^_c4H#-e`V|Xif^OjFZGkIt(-?IP zhnW}$Xo~9@r0z1VhbZXahi@=E`VofH4yKyZAB&x`2XRlnleNg3d?ORHAlJ~EHXhVp z+xLA<;(D`9hdvKajDmVe1wO{I7811eD^N=~QDzIXG?5^|R|pY|IG+deNnxekS*z_X zu(a~}aVt+NQ#h@=BnG_)b9;i7DQnv^r0>afL?dQ|PB#PR1pGBU|}*iZlr6asU_{HKOE{7w_KQvDf5QjF^a^C;=!deYY{Qj$n`3h$2vGz^Ybhgd|Lmj&9(D1xgRssNjuLYc8{*n+2tbbWPq|wKf^vM$J%+&k^%-=brhSK67sQ%o!`Iuq3R7$0ACxvJ&u1FaXLFp14^a zGWGrYN>&-T7?IzPF8Dlv(yIK?8L3lfoJ%OoEE)3$Kn-cUW_ zHln7K*kPx$83^<_jY#DYjM*hCoXPmR4f{WmTm`r@&)q=~QL>$ONy?~^^ql+E81#ME zVubaFf5*-w6<)-&r@PJ#MaWhKO%dO$n@7CoNE7!kI7X>BYKOT~J&hV^17HH_VQ~LxA^jNPjrkA<|N*c&3Qd<9`rpaYb%0V>`ab77He?>OTI&ZUyCH z#qq|rSnl9FK5`^lhO?z<74Hlfy)k^INd#GTNz$Ovcc*q3K+PT||Na1tF?*78r0_9@Gz$T%iYrXr$b9^UGzI@K-_HDk)pQ3@ zk)^pmd~I4slwfU72GDz-C(Q*Rq+n_UuA%p8Uws5O@>Llmns=*LP><>@MhYMulCt|{ zA96HS|BLyZM_=s=0?BfDv~!~}8kNy?vlGL$C9_pqIji~P@H0~;f&|%6iyNGg05W&UU_;VQR`SW^kS^zwftAD zdYVlOW;8x|f8u8FTApEBcA?+*6LxIA6fx(l(wWuqWqPaq&2}Gl+HGZJO=x{Ocywpn zmFsp&q_DR%OMKVa4NV592i@OqetUMkMRl*)hYu3z?1gXUE*-ZgbnNMC&%@?E3*>?? z2kPdZxwgZ%>O|`7@jvBm-s*B~^0JWC38#mD_Bwdh&o<=?f6gd7_rO3w>FJG&JC8{| zey|e{EIM`e!kAeOW7G~mzBTc^eIWQUd%c<3;mZrwC1@3#c)coO2e{wMgWoLs@H0w+ z!sS^yldF>XHy&7+p7wNZ+qHoB;R6!6Tq{aAu;@ho><#DV^xPYpU8@SdoGsmPevYHr zboh~UwjH{B_QLRUe)yTT^VmBI7wrf#i71ivvMs>p6pux(Ejdh(4(%opC`Z{{JG1HZ z+D#QtkD2VMUVgrOx<~ATw(t)h7|isUb9A;%cszUjrY*M3*6pu z^+a&9(ATS!exBv9Vao8&?kH#{P!hc8fY$0h*)26ifm;}k8VFVx$X1Aos@1T{0p!RU ztSSvZdqlyfN4&3vF|nq8$WzpXGlpkN(~hL5q(RP4UtgX|sae?iqH zYCGGt+4vERhg!NxCD}cT-H7&Sdxq5GxskM-oa#;E-`0qJ-krE`qb|kkK^W%42P6Tj zRK7~-*}-oX=6&N^HKy535q!z*T$UG_dTZk4{MkRxUAhM7*@nHLW0!pR*(}P5*TDtT zpQ@&U`3ItiM|XOD5xZT>uMSHQO^Nc03G<6P#N{0&K(ZXwp<7+`2k)C{|AMGOdX^Iu z_pVMq?}0OoA!YXat!1-)h&pM;)#=gc*wa*5NafYJRlgkL1l@bEbH1i>vxQYH6Li%odb*D?eN|3&wiR_v%$r* zZQ@R|+Uz*Dz-bNH_+-_mccoVt?=s@9SQ;##yJNvSvsJh%x9zwVB{j%y7HUR9IwrMV z8jxn;wobE?Xuz+TMG-J7e!U@^z`kwE-n2U|cyd#D!F>bS#}9txstD8E&0|7VpO~?> zZpZlJWMlBqA)ot*?#Psqf?1S{iOT%7>?VigbsjRO4Iwp+>mj$8WUPl&_mKhzQB7Sr zFR;XB{N9x2;PnvRh3}c>n=STmib{6jN9in>gRHIet*;BNA!zUI<B;p^OFN~ml@YCgSJEK0nR7K6*_AU&Q%JsNxzCyObl;i<*P)5&V-eMUhbWDV1ZBRh zGLOd_xKJO`CELR`7ZFJ}<-Ud332G(zY?_xS>%__~8cbvBx`%Cib?+MRTfmFGfri+S zt?pB=ZXA?LyjAw1c*+-6yox1FB4$}U7fAv9hn!V$ap!7a;y>H@0g_~Zj&*90$fAgS z!lxC}f3cjFB48PvpGC3mmCT|j52RuUzX}2?mS!~bTVA>a27AUS?mjkk5&S^igX$kX z7(A4$Isvor>X0HNBhg78+~BTl;5ql3O@A+xb<^K%)wPf&(;;&8UOoLU{p1jn*o&)P zu1AO7p`lc(W0|4r25BKt!~-u!hdPN8M%EXSO!??5Z&z3T>d)Hawp6?;3zG1T z0!Ola64S{&?g&{DWq|sN^;dyd7!HVJq@{GpykoPL_%dvey(1G7;q?cz3yJV5Ppd@M z_OGWVC=cAmNV}LBxPkI@T%6;qf$|5w1V3g?lmIp`Z#M-#|G)5#f>{*c*}Eux3Bb|C z31l*RMeP`(AmFX8wsWo(g8l~hZzv#5gnbno!}92g&8#8&iteW;Ot<}Nzgp&Qo(No) zR>7Fz9r`9m0KR-yWe}SHpM48Nhj761LA_l}r!CWV?n%v2;MS6{1KElLZ&B53ld4T6 zG`x6yoJcAb1~}g}X68IVVF=p_Gbm5utmSUPk%rB__07Rn6zbz*i7>L1w=<{#Ra1r? zN{dgfe(3hI8~KKq>wExF<@7pLQ=6VI76<0I-<5W~Qnr{--w00ozrS(nbS$`In%h^*ynukatWD@fGDezs5>ZVw zO&As32v4h{q8sr+e^hj%qWd>87o)ar#7Un~(T$4k-^nD61}7sg=xJi~G%XQJzg6Et zt%MGCI%@0{^P}@$W8b@zdb!+W)w!FHeZfSj>8n=P$~wvEr0K49;BHy5EIN^VPUajy z-P66pyc*G3uf}y~D&-C)P9NhW^IOa8eN_EWx`4>#iyb(G@B^ zJvH9bq#29TjJ;IZ&(kWf)BjHePPlbsOc~>rg~{|l6x~*z-4B1 z?e{x(r$*<~_4Y5bDKB&CmaYJ(2`fdrO4+`{I~29uH2j3_c$X&936HoV9P-#tzg=`! zfg1$thk*P?@W?gjaQZNM1UJDHz=3}Dub$Kgn4H8{SU7N5>ElR@Yi)m64 z;uvHs9bD{3;~}$wOvQonU>u9KJrV4)9MxzShHsM#h9J2XMr>eB{B-FFKmb1_uXSGo zDLe96M5~7x=hGvjWOtq1)^X-mHB5U;Fr548cUOI{?d()73+VjS68h<|F>V+uA>QJg zjOJE~$A9{*dhi@|0V)mEn1}j8nam_Evsd@dv8x2H->>tnwsWGj;fup29wwR%xHSOc zMLuVBjO)pwWYWN#{dea-v}6p2LODIKX7?yIEon+Kf1332+`t7zvqt`w zDivMzyv%_>TG-Ig;dZ?Y0z+j7wlQ=DyM`y){W64%ej9ypN|}dc+S|_d2cmw})qL<* z#ix(w8I!i12VX_)@Yjbq*Y5bo2Z;q92HZ+(%{;andh7YfwP*KGYSjyi=Xkq+_~-Ha z5~H;_kF|4^>PvoWy94gJP|eg(Qn87YnzBx;0(AtV% zAr)|Kjk}^PiO;yc#iI2ox0&%z&1*86?gk#Pe8P^Jc@TfFI^^7*q7;*q?tB1C2LE(B zTSr)lP3gxSh_kKKdU`$E)ntd+D~XaYXpNex7eixaEBI=n4}W!oR4KCY+x&eEJUURJ zrw4ZrOT;qllW^$!#4hmjK2oX=iEO)5*|99o9IJd&Yh_b(eq!WWYrq908FEp5kaPA9 zYJY>`#{8GH1=Xn27IPivnR}H}Fjr@w<5uQv42#m*s%CP1md?F~*IdCFm)d2FnsCUO zR;V0hdjQ`W?&oY+QJ@?9t0#X&jo;0iIn*^byG;Fxm4p(TjamX0zoa;&j>sS{$vX%c z(|;00-~)`KFj-MQ#>cOyA->8%m)DEzg?-cmOw zuCK?Z!EwWutm5{**rC%i!Yk~R z`eN@poeuVjq93d}j{^q$EnGg>W zaBe7?Z4)aAW8IV}VvKf3R=U1YYkR2LEDxJ@y_DHmBBO&Z!k@4kyEaw@PbAh{_1w!I zD4)n(aO=wUmIu_C9*_^;{N-6gFC^MwgGBeO<}tNkaW`)KW^&4dh z74ep$i+sm{{m#AAa4|)~Wp?>&%KM!y1l4OXYp4u9gf+5W}tkh?}+;KjxD;7Ov$1Ly> zxA!5V`;IMxcz%|rbTGUjEix(c{VNyn7~Yk}CHG3&y4sH(3h2Hv#p~T|wOL1F4PC-E z#v0n4w<&+aXkgrO1glAMiMrf$L$$%3HnVyY={h#GqfgNHviQ?022~l;ug*suW1tu3 znKki~D79Ab+cnHa6JIw)9!_x-bat|FnZB2F&iu(5_0-bNPEn&44s?+h{;tl$n%&V5 zq6NA_Set89YZI~8=CduLZ_QxS)^FbX%6LV{mAyILHGI^~JOljelCe}$aKqw#aTFoz z;(}F>Z3DV}c2H;D zMGsuL?!RuMd&88(X(^W+DlHz*o~LGV5*VGk9X9)#5)B+9a2U}q88ba9>{4l{^eAiU zjlP!G#lY6NZAagq(Hb8}$n4s!e6-Ps;_&UPkpx23&3 z+_&?uj4Am+-YyXGY}8`cZrb>L>p9PHm}7aTQC9q| zRj5^{3Rt9w1CT0%LLjsZ(lkIE4h9Jji%by%0+BJmsS{u-R8f#2P*D&{A7en$QW$QG)!&;o*IE{kUS?J6UIpQ7;>eBbt*`OC{SN|Y|#TNT$+^z4- z)31;$*rx3GlXFxqun9t4`_A zQ`&C}OCxhqg0G`caPCopj`In2+PHC{JD#sc9VX!V$~9Hy-^fOqW-_VBQCtQ(e0C#xpl# z2o>!b*>}O#j!RXq?InAeF8!kGN2ysXiOe)^knstL#+#}at)F{&9b%1i4W0|CD+T+8 zjG#v?+>5OAdk%Y86Q!vl8CDWIgvGbhHQMj#dlO{C)`zHyr185 z;3|kZ;U9{{Aea|7K>#er;$&Y~84w4EQq_KSnDxvht^fu=Nr|v^Kx7$TNUf0qo95cD zwX4Ifg#I$K1I&5A1L)e|pi1Vs5a_KoM@Gi-UP9ClR7v~!mjIiVz1)_t7E9kyIQGLM z@}y716W^#t!wYLp9HS@hvQ0% z^y%o0;zd!Q&fN1Z6jf`Wz_eyc_Bb12RpwSrz}M-90PsMBbimt(;78#u_jyg}p_>)K zT+%aw?riqy{v8hu3%6t`(=J;saOI%&$BsRfB)OeeaMlmkJ z=*rns6w6J)tdJw&VI%Rc_ZKxB*oAW-$FkUi1-`U0CK1ORP^X?T;gT^QD!k4tq z*Nc2GA6F73rNM7GTSr14W`j-m?v7ZXskuS)wChusfd2rvo~)EwuB8WRmSi6YU323W z_W+K0f)<%ajqocn%tZkV|A~A4wi-=~6C>VUm7ZyA!ik}1Q5Rz)RD_!)!znXY7o$W1 zlG_bs8f~J{KxaeIFT>OSS`2M)PtS;a2>~jv8mb?3>8LBnYliM*@YupsF1({MH@Wn( za13+!ewuL|1sFhBPdIi}nZ3=u<7h{N<1>+q30<@FyKR=FGUb+pT8UWg(26^wa|JukQs!D%uT1 zV`!)fs-$uRQ1Qe0=Y+t@~$ivAS(@&^+pl%7K#bu6({l_w*y$ z5aT!+ZS#q};zm8wr5MVQl z!~sf{_Lu&<>RKFKiwL*jPb}-{3at+$>EmJdC#{7645XeqfNhv>c$sA$I@-x}shO&( z+c-BJlRf<2A03-f6gM;kV?M(*Kj&@Y#~YHfQ(OZ7hQ?OBZI(&eV~jUwc3a`T`?FBI z(_>ci8-!W;HC{KntYCKW;GM8fJ{5IGXxOSSUIe`pR=(%1g+@g9CcwVPWFL(v)1CtZ zZFQ&x2-KHW#GcG@1Rtfc*CQP~~MO#w@} z(Vw~fr1o*(R-OQWm{ze^IZTaVK>Ru%R+R0xUA zpgO{l;)4!KKu>~34beLOlUiEI$dO&c)8EFjS|F9Ol&358JLyBs&v_M`m^B%|Iaxmn z1b0MTr$edx=q-A!oqouR+%!x8kSo6^gWjd>OVIm(K@d5>A(q`yTuyqFXq*+M927!3 zo!2_z_fG@!*_wa$)Ol}mVTJPPBEz@IyBnJx1#RVxY_q&wKT`Cr z9mq3A5gY(7RirQyg0BR&O`p;3K55;%xqux;9-9(FG-A~Q`~)=8ZsgqrXr$c;wi60# z3EpCV<(>Zctg-4n(pGJ1@Q3qRU0;B?YGUCwmngO2nxDb!hWg7Zm-ivue9_?7#;D z4MhV~W(`qqf_3+R*r+MsL_%BoURzCviH;P0%w96m+_Xf8jP@=I0?M;zNxOP)f}q1< zzpUE;k!RQPIcMo@1asN1nKHdnK4J}k%EkTQ2GJ3y4;r)R#yLl$eHOW1 z%m88^%W(kRa3BrSVLbSH8M!Cqd~AIr)a&KNcsT%=X$<#iaIZGpbv~_Tn`JdJrC?l= z_li250|%PLuHbI;%-=M7WlgnnyVF1$4`te%)?XJeGR^G|(v0cu+!(jJYUC17a6B8! z+cu)Ny7bnGegIl7DL;*WvK8Zf-v<_WBEd~OcbpCzmqagQ-VUNBzLEYAx<|9%2rBlf z8pi2oN&C;ryDUM|et>Yz%E zKV9qPg!}7_+?!_s<<+|E{Fhc8V7;h2#!0!&S9-+8eHmT>zP~E+w%GVeE|AnmV{Lu! zm;Vq?dsbwBHq0F4k@nT`QuNtvJn?(2F7#_u#i1l2%+3RbdXLcqJdXDh*?QbOx5esY zz^p^ujeAeIS(%?TnQj8X`y+xbD)@|!*GZm#ZUuW()8_8!BKb^{nKN^OeCJ-5!HD!0 zSD<^U!I7=Z+t_|-bt?Kh<-Nh`1sy$~)$w9Ka^vb8^P`Z?|N2wRn!w0-X}S-*2WK8W z1YS_%m{>$FrN`e+q-kOE`cuqb$NLwH12smOU0*YMh`yqPl?aTgooYs6FV@5MRur#K5G zW;)Dx!@`w|jfH~RLq;t~k0(oGvI*0HhogIiXZMCQ9_|e@NeCP~_kmq)p>9k)VAsC) z?7AtpYrEX8*;GG!akp4N8~TQ+myzmE-u@bE&(mt6<-PY-}Lb2pT{wAs365aFr#4Z$(x=|mjJ~^u|%c6K29W7g&oh7-q))N zt8xT{9q=1PWd2R0hHncn^7MIR(Ccn-Sou1AeLSMKZBpY)^Mo=_V5lzrfk}u!X4e0) zYO-Obzc)$AF0+waoLU#6g6&MNl8`h6KOI2bW-|!|Frgww#_r?grI6(OQoM9$CT zYs?*EKA;VclB+eGyW0E_u3Lp=(06z!RieNQKlU4ia##9*6L<1PX?Z4{UA zk$6wsox+(c62&41oz+h>f5_JJY4OIZgZ@uj$z=)~d{?gylh-QlhfZ4Bf>#M?#ge@| zJ2RAVwyE{%obvaKmZfHDYy}*q*Vj%yzsst3`(Rs(Nt;<}_^V6OHg{BN5F>;Al%;hS z`q1{l?YQ=~$zZp(fUyoRpjz8yKaI!JzdmHMq;zR&a05de7;lh3)u{Yx)jub(On#m1 zgDN;qn{;gn+kcza7EoOdxnh~m1os@pVyxD)Lh|5ehKDI)aO)`hBC*aq+3Z3S+uu=X zz=l-cQ1s?!fBq=;`;zjrlVYEVdmGWQAQWJB=PyqZrL0djWq2^h`rJ;=Q0AunvGjYY zy@+eY224tFP7pz0k%#SxBF$(D)KZi!{Y|(+;QUI4b2=N*2J~ zyURM`-3!y@uS`7{=dEDHk)L@guI_4xS*ryX)yLE2Zz&>)u+@)0szxJ^?O*d)u=}ot zDk$G43a2W43f9Ui&u@v=c-5aw`%SS~`l-2fjlG100b^8i@-#6FG$G&)6dzry{cs^9 ze5P}Hiuy%TcL#u^so%8+2PP1_`SNL`NLamavU8z)dMxY!Sl-kW0Fs5i`Tn&!@nbQR zs;&)wkZ}&GEJq%ktcPzK2uCRO*t8?2|JDPaU32u%4e;T0Ib`UtueFEFt{WQ7A4}D}EHhZp=(kC1u2^RS!1vuweXmm@j;akw)pUB#Av2J6yUb z9E2HfzRa)(?bf3$dGxN_TZ}9&K2!13NsZt7VUo4@=&wbi-}9eLgN%Ab)#%@^c(3G> zKbO)&cPsp;_<8KsYQ3FxZL9Ss>#XD~v|Pf;1Grx}7%$nD{1`u?K)q7jawr8cS^4dr z(U;Sc(?hGt1@pg{>-RFGT(a5LP6GUruTdx%8`{ixaSy&s#J9N~4H|Eknhu!t4|T)k ztw4}s+n2z%Njy$;DUbzwFwF_Q*W5hjzid{d$)D{m)nXbj%PGCW>hxikqe1ThoW7Qi zEeT7C{A;hG;k-VcpihA@o`%VFjgJVj-8c+@V7Trh{$h6XOzYfFAs8bwti zioin5Q@GQXFuQ5HOzTaL%FE0*%P-1OeKBj9W7GiX1UIlRG zdGBBdzcBa5W7pWUYyE4if{x@7zCqPm19A+1nR!RMnZFW6Xwl*4T+G<0s_aCjwTT;B z+Ro&*US8N*f~t z8l|?~6J6xmKL~3y^m`A?@hmj__(383!z#=68C%y2LpMc1SWo9ngEEKAvRgV%>H!GN_7!zN(V+hmJ zE(4Pt1tZ~E4A#T*g3+fR#^i)y&{|9wTOX^FQK@LUK@~(+fAa7W9E#Tp-+*haVN7@D z!eCLcwTz_5xzpFZ97yTTf~dR9V;D8APRC|?8bd#3{Atr_H6{C%QKa;D0dPp2W%YUN zORcaP7JMJX{$TSxS;^)h;W_8cKivI0K0DTG0UP&wNP{XvVfCMnFzS1N`hxs<|45^A zN;HrHE8Kc23lEqE(p|Xp+Ni~xm;#ehbe0e7?A(R${>Xw=5m4BF4lc59#EgacLBzLw z+mJTQXu}q5u@54W)bS3DZ|*k8DMW!~d3iE=b`qh6LNDG_^K9neZABL3S##dY^&bE} zzr(WsIHJb{k_^h64~}#038;ysN5*RSZmYC>YffIe_!p@u;Yep*j4>8Dk@JpSBt|%6 z&Uo+4nGewEbb{CJ)dapE_`JVVp(i0K{L5qSQsVvdoCM-%PV<&gu~3ov=BaVg_1{DW zs2sp$+Rh?dvENd0MJJcAnezlNzJnw@$okr@V2=IuJ8Y2k5GOF}i9Z$v<{QHDZ4JUK zNtAI!eRKYUFuYtaDqu+9%M>1CH5*SW#4x0=N;Fv3Yv9_g1ysr%g-Y>;j9As1`L-|n zc;pp?2{-&=HPsrOGoiEr!WNQb-S<;s6Fw5QlsnCBA>dlqNswE?oKfY)NTB@ySLb+t zq+AXQ7~FyZn$sMSmzKwt(Lgv!!86 z2G@Z!V4f_uW+$?wim#G+^rXD&s{*6*_k_KkZvPDnb5IkOrzf@mClW;2E652>)M}{S z?rt+{+!Ao7t>*M~F+guMfjKm}2cuaW`4F={4>o7|BTu1;g61~hzdy{-J zR6tWAY4^h3O08NEx%dc9bE-dotEXy#Ev#Bmxc^VhAlg^=ZWY(#*lpN&ctf#l!U=as z;JPt$25I&-1J!NO^MG>FYzKt$=qh6o5KRXT!zktV$t$Ad^{DoyDE0cldD zHgno&&sO)10HVwxPA}*y~$Qt<}Bt}J@ZHO zN7FM3XE2!TpzSNyeu95*yLD017K1q`jQ+PtI*i`{p4@LIebr9I%EZp`nvF5W(b16? zYiVX{bnTWgua%8yu@iS3bU!{>Ol!GVfr{ z`M@lx0|^}`Gzj5@`GMpeec0!MNtq|j9z3jv4=v_3QiE{Y~nlQu4NX{ULyDN<8`9OOLUewaN*6eR0-feqkHv`Jje9yt)IRU*Hoc78qCdM`Cx;7HX1JTwuio9s@~aGMk$xu~@sFRl4d+Hq zH9o!46VhK$(CgwlUXlE)Lq|*N00RR<$IoAn2>o(7_~^vf;Ytp8oZV%))g+g+e#y}>$0aFuZ$`hYJx-nG zXIRj2`XEF~isZe7rFOY5%o;Vux8Pi7PG>VLbl-YKnZCmo^{8oOp3K(Ax_L`?av_J( zhO=J#l$kTw;;o-Q@ozu8;^PYclRw7)`;T7n|Go&Ae_sUHIscoBpbX*si+K%35!qQQuR)&rp$)om5MqVw%WM2Ja4v9YWwNpEon&v`IPxW+^^ zy_^p6d>g2i#$`BJrk(VAMcH)q+jfhB&ACN+c_+EJxN`P?6D#75T-g|yDvJpc+EzWs zpB0jPI&?1X<g2NUHuB>ck(QN}J*HbM4ChE) z*=1Pi^`ayFyJ++@<(o{GWof+&Lr+JL_~yt@E2u?m$OCo5qxv)#X2))8>}Ic*w_4OR z@Y}w5eVhrWX0Cd~dgiFR(UD_p-C-O~=q|EV`2j;2{qx+e0D0b`-z~m*Bj%LfchP*~ zpK)^kj>7+|9rW)g{QslN_c>ge|GJ%#*Pm^5SZ4agR<^_+F-J|F-)zC@uaFqCn-#_I zKW^M%Uw<#x!ntC+6mB-gOIyF(#!;oBBoX>6#_L|RW3NcH|Ney1e+hWJ>f%rKY{r=X z8hk`Sx$~UK+(=7D04b4=elhQ{fNAy9D+x`g?D+dbKjY!4XK_0)=4XwFfdMl!Gv>Y4 zL>4lD!nY>4+Lzwj$xZTn0iyOJRfmIC*h`X~R#t}Es?P0_V3u&5_rJ4CQW$?ayOY;x#%0N7Xm>Z4yDvXFe(kKFiq2qNBx7B~DLZ)#1?<_zxbZ zuUy`c0ls_D=_?!4!@vLd-4%d;{`(63f9)cCh6#ej*+#?*P?`%CSoKLBx%B9M9eud- z?4We6NnN~NnM=0QbcxxXU1$7cY1GZ}O0gdNZi`dHy=AU)w+&koWPQSt z*u9~3YYQj6FSkLGiBZ??imIw=V&Wh-C#UadW`(4v!`P!=TZg8JYz@@Fe7lcLA(+!2 zK6VvYt-RkVb&=_tMR&Rm6?W~3^b7YB3X=;tCpNX>|AJ5N{T>Tj8_o;0?UG#ljh$kY zmXVM1Ve=5)DYY^clJ(4o)#WI4w1UbV<4L8UFQ^G!8lYZSdBsLO&i>6;U?TN5V4AqQ zl-XEXR5)Hj#BVPTZY!(PZx(J^6XXOS5e&0U>LLpJAIg&;2Frenw<-rUa*ix;U4)2z zeXKqEik@Cf-FR0)(6&8Ct_u*1drO@g(=~ISYGmpi(_5M7tUC9nVN(UoYq&XC|7SKy z@nXlx80EODbHj~?@9tmLrY z(^Y+}ZwBEe5WO~S&MA6yOf%>A>L50@Xfa1cS+&Uun#W0tP6UIMf53QKwWez}<`~z0 zN0^xFt)&?`>s=x^S6n2C)j$`XK#%fA_I|F4607O^gcs+{pS^ijyb@n%G#t>O<4=$f z>T|QzarubPHt)!BTAgb%D(G>Lmr1?ZoTP9mQjo`D_vWoT{S~7{@d0kxuq2N1@F-|s z<9xSZF+bMP;SQIUZen$LIPd+#hY!cf@Avl>oOQ3Tp__KEW$0kBuG)=FOZ*-s%OaE< z8A4*UpI&{J$E3uD!bZug-x>-+7(hNpG7Os5N@~h<~~{mU@z#dNmd<7=bf6>Q|v&C!?raFSl_T zL;k*U<;nxWZ6Wt=}8Cs!D zsXImb?=Q|}da`7e&dBN5HA-_LnsVdgyN^IOt74JBN3!x<))t?AE5MnN3Lj-XO<*L( zxK2F&b>n;C_upb+=YM%F&VWa^_@qLM7!-xM%X@jWsw${+ndaZa8oJmGM@?6MZ}(x+ z)T>=Vu!Y@CoRvx(#?giH@5nVJ!WDn`x2yB##&3N8Hd2T$#$_bsYTk`2h$Y0Ug@%Nn z^^^P8Uiortz=43 z0&To62aRbT1NO5?n7T+N&(!h-iP#P%=B17o^hr(tV=FvaIvX$TxA$-BV$pEJI?n1~ zv3<+mu^Ms8%n)MecSI-#la;dJ(i#Li4?Z@9BAqWNSW%Rdy+v$kIDuTvWaW=-PLeLL z?D^Xz%EE@EYjQ(wV(MwnHcBWh6_13Z!?x$hrPqLPOPy!qHFHg_Wa^b&EnVz+?&}jr zNc5y_YD`wD*eB`cWH;F>0SI@bF+t{MCc$HoTX!6wFLRlzF~L0bov5HuVKsZ8ZMtZq zRba^i(NNpGXdsMp)1u+h+$f{_GVrY`N>bcgc9u&-n&qVNRvew!=VBzdFwrB1Hqol0 z@X%bFeo&7?7f+<`4G5#^s-GS_I4pAkVoJ06)g;B}1JBLc>OBPaMsyeLw&=^Jlyhbws1uVS;ahjnRTNv z{t@I&yWu|%*3{G}Ywa_6`)K#}y~pl3jCbM_ZynPqJbpmX{87Ff+37sD&27NM&XIm<%_lY)L;9SS_=#*GXucnd+|*^G79vC^m*EO*A$-q zthLfmeKdKI9Mr(uMDvkaIT$E9@^ow;&Xvxa6a#sMLyB|ajnanogucxy;@M0n5&eca zJ@|$r)<0NDQWFb1g{cli1BS@W(itkrikevYiLq~L%4e;i(pEp%HIaEW>H;Mb=c>GTh+OG^uEzql`b^84`0M+WYw3LSkKtAtBc*0onT&VR<8 zX-w0A_~aFF{q_BW)>O(+az>N7c1sYXrDSwOW3P5GUXYfS?mXns=f3br&_b&dBU$0G zx+L9K?w(6|*W7@+eaEU{Fk&pVmJcU(xA#$9nd?H}rp;Tf3#dKEHV^KRV!IH?ykED( z0g~)vQTvjUElqN z`{#zZ|J~m(INbk>|LAq##{0_K*03%UVOf0h@Cw*)6;AK?)M=JU5SuIw{vH=#1dnVW z4)wDadR_UBC`5$hkJx`~Vn;jszj((JfqLP?5GzMKZ{DTRw<(KEY0st-jWKCOLAVP|Wc{pM}-g&Bg zp=^?Xw#=_=Utag+XSvH?QeEW*A&0T{qkMb~gY)BE#D8wvlk=E#!m77)><^>=#)iDk z82@(v$s3nHjkIt_Zi+jy|HSo+QBWUX(>}ew<4A;xB)V>P(0yNK1yvE4i^`=>Ev-FcTyQ3f8-_c;SiuJ3ANv z;#UqeKGRKhnkJ-RRk}A*CLYMJ`c|wA@M&H-J>eKRItz@{b?2x6T@oMAG+w`pKMTX4tn!c{lFKbVbWqn7TzM@_3<{XjQhL;Z? zD~A9%J>O+zx-KDqeh&ps>C4+Mhj=VC$6kOSOim0|l#`uZ1xq2wFt^ z7w<=?RXtOWr9pi5QYnqP47eUDB!w@>_~j=((}rW!dnD!Q(QVrxF)c-#mEuh4O-YV9 zFcjlz+3h?OolV%Qok;)XeP4b`!rJ@t`|IsnS?n&mEqC<=#?n@X;QfFUm?3G^Q9F1F zx=;M=@=oz@{BmJ{jk(JR6ZFCs|63K|#dmJ~US??qFpHevEuDI~*wNq-_Hg8Ly}mcs zv?0)iS0kBz1fc*R40~;Z=IB;|##qUXqml2b%K9G=UxiQ*Xy*fTQKdz)qyNb0|Nsc-T&u&?K&rXTHyphAz>O= zE7U)}*g6?V4w4d$lv*^fE?u2pG2e2tyr9oR4Cot=1M*?V6t?LDLN+#wCDSyALGntC2K2NL~ljD(n+ zoLtjx$;|%^w0ieUR}2t-^uw1A#DwAjhl7nH0nidGY!mD<-{aUb zw|t|=r62_39WN8K-Ds@6A~_)7i8c8GzbADWV(z(XH4M}Jyz#NzteUZD&x*MwiiTo@ z2Ok-c0CfaFHIsbU4M-z8Z2)q8i`7DFkRMKYVF;FUO-%{f|MsJ;3Nm!BfNmmH?NRav z>P3{@`va8yAOR;6J(kDAAHKXVW-*^|=m9WE0{IYf%z1w-Y9Ne0E8%seQa{EP73I{= zxm&$)za+V6uv#(uc6D;la&%Uc))O#CE3p}<854xkrQJAOHz)ma>1@52=8HO0iKH40 zSTQNehd#4}U$@88e@P$R-zBKpy}3VVd6Fg{WmD6EH8R>Sz8FzB93KOTp1@l&#wK7| z9x5n!7YQF|2PPHzynM*rBfR+j4jC?sA5CXL ztUPj$V_zL_O??td=V}_=wj^H;lblgf2KYI{jWJazLKiT&uPfL7+i=4>-ZB=J%C?T+fnH zWFswzYSDa)t|X{M^-2rnM8ek7)7pvuCE@?A0+j?=$J+?ZQLFQ2EWt;1z_}IfQ7Sz| zT^UNwg>$1%0y2};bLH;vyA)l)SpsY~&YD`jIun9Ke_D5+{e>V_mH;N9KoGEi+=1n2 z(a}eRBRJP$hw+H~m)Y0fJbbEO;nCSjjY`)RoA1a}2x;MxmsPW( zk~5wbw1j2bCv3?kgn`c?_mbPl0F7I71j;f0ef)=@$XcPy8r6hwHbOdwumQ_}Ewt!u zv{*qL&Xr1O@bnn)K03$r&QEKUqE$aoL>`@eOzY`<9#8F@+(e7w=P)UyHiTTzu_iEB z&$VbIXQXOdm7S`GTEN_6Wt!L~S@1EW?o`Fdx+v#AhWF5&5aS z<%^`nIxCOhx}t^;Kz|s|CiCc*i+fON^-YXLe3)%6B1q0c8TtNX+ub%2uk_Dza&llD zs0h-49HpJ7(4UfVCb}C=>J~i9F4*pBCvU~+=4@3e$US0tg(Gs;$9Iw?`mr5Wb#=I_ zUiU!NOipQ<=ZTc=+B|=+tg9d}+M&mztl&u90YYoQ0ae$oD7=JzH6S+DUE))xbv#$+ zMtR2Hpt4-u-*M&*2NBeR@7Y*f!1^*jY5-I-&ULa(2;J7T^KBYgo$`_Lm!eJw!T$7r zs*;>@@=nBI4aF1VEu7YWV#5hS)V_>U6Zyp5J*3bSzT|?v<)ubSl85wMXio~4vzbVh zuey@*QgOJJM*R3=>(jHrhEseK7dh!OaX8#bH7*_=S(WK_C~QaoQgO%?!rl@o?ralr z2uO-)ytp!$YsuK z^u}klxpj`Tq&2IiTpu^Wq&foSR}J|z$L7_YouWtaYRHS@u{x?xgs8f|pxZ`X?psK7 zlwR+__Ag1mnb+|OW$q2bLP&VH4D#i`+Ckh~Af~^dy32BpRN=_jqKl6^IjeqaVxG$# zPpZ#8huPOoWHu}Ok8};mey;Rd8Tu2`jT0r4tjRGhH&2?kAIGbKp;Ey0=g{2A@@12S z(dG(InlRa4zz|CL?<}oxNl?&yJs9N2StI95hF4HdXc`&d{hbE4yb7Q&BNznaI8S?Y zS%>ePrg^j;rkZvKWcR&rP0mXVoKK_tGC=hy0RpFto zQO^;}2kR{gC{oUhUH~X0_+0;k=k9{}VB^CpLUA44_CMjtZ$HH9vH0PUq`QkhtAr@N zrOUoeHvv=+?B=7q%}Mq5fJ8H%Jt#8~-jmtj!*)z=Pk-#%eo)3@Wg`|H`|0k_14wAM zEE!i;F4GY)j25*w1Wrpqb`Jcn2bL!*Jfpy~tC@Xh<57UD6STgBp56wEom?XVNb!wEspY_dHtT;o0E_NAYw=lI7S+@20l36jIk)l+FM#4}=1%&XMdHqP5C z(;taHu;Uf?P`M=L-l=56wJ7|U6bZ)zH3ct{0}CO@#{&lB%-~v>%!Nz$9dTt&w_(wo zdQBqu<(`N^wu>!4@4MeL(gO($(P;aGZyAIX3rkhHw4_x?GdB*zwObOCI8$zZd}usP zvdtt^m{c%LQq4T%4tly37bd5_HM*0j1sUC_%c4L*drm9w#%}%P&bD{}ONdAdQ9or( zssiK08D_gb&rE}8fd97e@d|juZS!S)$8AZ`ub|RGr^HvLUUjT4y-QW&R=}*3gz@OI zO7MN$yUa54TzF-8vx-*P14qdN3~5|VTTY#mCU8h`gf6vQl3M+!L`GF*ib`@)qDwG` zQXtf{)kv{5Ha3Pu1B8wtxG{jC+xU1tJA>2t{KR`2Bz9LQ4s(0Tt9TZ&@9cnj&5^Xr zbMc%wr9T)7u@2K%7U0Nn`l}-vyf1f7F<{?aA1z!7XRLai?8%h8Abt?vQZi6nVl+^r zhP@MU4eA1J00+6tq}dMOd}kaK0cwCKCC*E~CSJ6`tR1 z_eF8J*20{vn5=F8mzdiQZ<$STN&kW;68ufS+mohG3Q?)AA1t;vBkmMCSqGQ{bPwV~ z_SzihsrP22cs~~tfy=@U9mcdrBC{EJ+5C`nIMmaxKFEK~n^=< zMw0`d2$^V3_Ldn{dTr{sq*ip|uDz$EKh&w;ft5opdQCjfim9Z$fmp)!@be`WO0b>P zlYWoaF0kY`th$%yeoW)(6;Mv{2EzZdYUaaAxdC{5tcRcYB=L}n`}FrGN4vXq&NM4j zFtY-WBHwu^Nhy}KWTF^US5(*J5Pf0Q%fDfqU72kl0$Pl%L+Y?RU)jmXwAq*2`OYU? z2rz`TqxdH2`K=ExZ-Bd}lM5CfH_|P?B`m$_J&so2_B18D| z$vHqT$5>J*@WR*%027T*UJ<3(w)9K~3lF#wlXyVX=*U98D}Wixk077U+b(3i+6h8= z4Z})5&ch~1Hr8Y5V~C~b-;@s&nE&-@PL>rM2kUHj-RU~VHeE+z_}W}&xCE{fMW&??<+B?km#g3&hBU3j z6>`wN^iJeScOPF#T{KzX#nytJBNziI;{w{}n=^QMhkU_U{}#-DkcvJQev7ssd{6(I=K7q7;}M(`$&9U4xK zvXMhN_{V>&EW@e8A1M~FR-n;h8rm6zsq+OD(jerM?lM_kM7QNxDP=Iyy8nSpwW8Q~ zNE0kKC8{$NKRFFgf>xHE#a?KeRE;SJ&O{iwXy$cy!(-@aA#U2rd|$#+Hx?=#KFhMX z)}!3qwY~ZF6+WK3|C^_F8tylS=?*F8WiAwvPl2)NOch0vCa}^$34x+q)TOqHBzmel zRNj%mtL8r;#qg0}&Az4HeaQXhHN&7=@vxyS=q8>v>{pNjM7t{B@DgELk7V!c15!*K z8Iet*zUgvB$Rbr;cTv~|wBvM^H0fm7Ex2TK|V$UQHII)_YxnTV`c}5K`Ha7Lsl{35D zuE6uIh}W}5mgPhjSdmwCAU;}y`=APp1uEYv5kpKdBG66$>GU8v-T5@A7Fl zMBS=EV!!+Yp=IbvWinyo1igh7<0G#bTB*%(nUAZ*J$+Cs&?R=%b zAj=2XdmH3R8Z=LT&H5Nz4RGFUY#xi(1i^_Z3Y8ID z3UHqbRsc0v?Z;QX55*O^RM>M1(jbN!`LH_@5lFnG?`B$tWbW^GUgn@xxK`urdB+SGLtaG<{0NF;0AD)Db4d8kb79nXvt|b4 zQHPPsm+xEq>BWcA%jd^W9|DAWU-N!)?{yv`pen9uL>F48_yMyC!>$5Xy*C*yP7tcy zP*Lh(@}^;rlK5`;x1$?vxi6?q&y@$?pT-4uLUGTL3Z%>HL4Als)lXv`0{9kJs1J?6 zg9D9M)Kly&+RtyQeYY}F*8v!Y?j_X42TI0rItXT$47$sg~L8iX+==i}%PF`R){Q$m81L`xJCONhpVf|xoFm-k7 z`B=%-wVeRMb;u8dRGA=Fe)IM$bJmkL{@lX1cPl3H)%}C#NPrKNwDDWnO2q=DsKek? ztGo;96{nt?_03K$KCU&yG%d{oCI!T~5fXwV517U{n=reMWq-V==D9*<0~UA*C1+#C zmAh{Lrx*%=8KtVh=d1fd7@RQ=-L0q>zmgp*&t=MHuIAf03p$Rr@?7xW_n?l_54-vv z*sj>YjpS+U*`;4FY`j5nKcqy?XJpSEJovlY(T8L?F#>Qv<;Cc;nhI?DE1F`Y}J% z#xtROJiiNh)0TxZmuGM2a;vA`LxO=AaU14lypAjxBFB9}F9kl0kRo{HMw3U}Efnws zIe`~DI>Bj)bM>-^Cw^l?)*Az(%iX!wWe|-z2xYd(MbOmeZ4HYBfB5p^=iHj{3(i&Y zA$QxN-G?5aA&Z(~U=Rr91av(a=r+g*a4`zz4)MrBbO26*9>PR2 zXM7wqP;G^y6O>ZRa?0Q$oIz}zk|gCiGc1D+ooJvCxaL9lx&a$r4r9>#CUiauqOhjy z;Ytl2jm}{1$v2^k=3KB7aoU!YMQ66kBnMd91NQtPUm_AFYx+OGU9*%@pjyGi6bm}G zOzsJD~QA8`nCBo%U0FsN8JKyN7Wi}^vkuCBbjxmoH;3oJ}V zfD?g-gTznNgI7=Zz1QZZ2N4Ny4{bbsYTdh~Z2E$Q2)BKpru9&Ut2C2RI0TGOATYap zN0efiJwgABKXYfx$JZPK9!Uee$vVC-;Q*E%_wCtaVDw1D)?_TFmKVEuaYPqlp*4t9 zi56s}dgio6_It!)rqnpWIm~2L%8!({kgunXA$)ZQ!E;j#T@decw!duvod?+5=%$h9 z7G3J#7BWA=qm_4W(qo~7>DW^zfO2cbKYiL#+cap&i6NOae=$r5A5`8&>C?K$zRkEn zz7_7lEEvp8!3@mAb>%ff2)H4FHTm~ordn=5%wu+Xf1Bo5yGqS|G*EM}ZPm%Gl^kUl zysSH0%oYhW2M5cCw1?cyb)ht_@lEA7iwr0Ic#Ds^_U`<83inlVjyT>u2t4eGFTRHE z?FTvj$L{N~v*{x1mf?55pS%0B?`(sVltn13+tdV@@)B~L2WoIvi|wS>d=1R*pJb=I zZTS*;3>SJFgTOI|&u~JZ9*I?J%Q01SEbEma0mopy^f8$TDodF1`@^eCbAFKIO=b(q z-A%fMNP)tRQ-$p*7Cy()WMkal4?YsTdCP9>od|Nah0TGp4G|`Fsx9C|Q(AG`Ct`Em z4tyTa829EO`xP)acUOTK|D zVoBFup@>m9lA1Un~39s_uWRJXQx3X zl&qIn=+4VnjX6OLYoB~y)*KdVxim0UIFHkA+RG2dt#+Wfn$?jP?g2wdKX0nujlH}& z`*tu1Ued;cDNwpLjHfxw)22@dNaxVdkb&}E$lmZ~TG?ZsbTnGC15p;IyyDb`>?; z-n>)zHzc72V!WWbk+W{#;GEEuB1nuDi1@jBd`hrF?UbEYHF}CmGFBUcqy)J&r~;rj zhUJq~3M?laH-}dAz0u4j#b%o*OnZp8s;RdcjL+5#Vry4<%?X`B4e5Cd1tk+M0Bg7x zB9i1b)Mj2mD8v0d@0mX>G-FGd8DArGkK_&1s8*Q2m>$g~2^(4E2YS9!H3-m#O zA*2t6dGUpATeDb3#X!)DWT3ogevoQIkVKS_`i+R?M=crJLM)pw1PT2RQjC&vcdiG{ zwbjaY?}Do8%4Fxz7|u0y)=&SE*0MO6JR4dsX4TN{u^i;tr*8hNe2H?k$i}|&kjs3x zfJ6HeCn?rZ7s|i_xqP|zqW1aOLG9|F7{{XXNv$c(4Ezpbwa9(*HBh;we=%ZK4yh|E zlLo&$<{uZF<#lx<2MCc0DpoBVDkO_e&|s}|aE~p%*yV9w8RI2KWVXGu@?n?fJxHx$ zAf$#gjkuGU`tH^8mK=+5n|TwGC7Gt->Gq2~0SsbDGoNIsm68*AKUHXy#M15)nw~y^MNJ*MO&61wykK` zmY>_-6Bajlk($BdC=AAP@nju)&JnXv z$HcWc3Rn=g#ScDMZpF0{d5!vvJJJQ1I{|zIjevf@B$AL51A0|El&_VoH`R;Q3Zu?B-|c_ z!y2t9a3PS%?A-r=Gvg*uZ`Kf7rJ)O&4D{S3?6hW?i+u!FXqc$NKDsv$f7w9numk|M z2l=H#)aOWkeAOtU7J@;YL>Y?x+i|Uzvqx>L$|)wO+(EvA^pK%kL?=uKSh*b@Y0Hd- z7Doj$ptS;!cgI2vJSy^#nWN91m%1&smc)b=b#EZLHUM4Dryf& z%5#A*C-mHgM>=tWtK}__~64<4Y>Wg&eKz|8vS2d z3i?yR^*ty8d`J5LE3DBR_s{M0MMWuCstBA5g~Q+zRqQeq8;Y`xhnAgbP=N*5Pw9t} zVooG)+Z?L|h#%Zh(6%}ZtgOTGF~A3A=8%gJZIV9x)hxDl0d9(}--Oo!2#}9~B-9A5 zgMAV%c5-P#&?V#qu!oGwQ79u~jgeG`=wYx_30*b05{w$!Qr0vt3{|3HsU;Kj&G0)- zy$?}Er>^PS4nj8!S;?2js(~NH7b#C+yt)zJDyQw@>r`9+V+ebMv5wqaXJ9Ac0Ayj| z#L7Iw457qt3AL0jN6aG-pKNM*5Srg7U~&m+za#ianPegBH1FlFj zfuaNmKBVWMeVG9G8*%*KmhzPv4eNr^&HIR*K)y(59#D`~gN~1L4hJdR&k7GA*bnc_ z;(wQ^FNN-K`Xy3&ZarvkNYz_iUI;>w9f~~VVrREv;iq|av*i+}ry-y9)74D?&GOe} z>2QB`)B?74-QZjitZ_2Li6CfF`HpN71ViVSb$Y7m^@GDkh~;C0=D_LKhl4(pN zH9W1+Za-qYGMk`{iVWUAQ$aR}U!aAR!lxs)BhTVM(GULip<$u;ZE+@`B^r@5DwfpN zSfl5e?mT!kL?x83j$0vBf=vpN)OQ3V)4DL}bpTDexR+yhdk*0haH_t$1j=0iva0Dp zL5M(*vQ`=3;|ox^=KK+>p!R-^|NM)2I$f6o*VkBD9Gr;(KuF>9Qh{^g2TCjWbKn|6 z+YeL|ZawFL&<$k{T#U-QsLK26(Ov}%4k=9A(ievHXipDDS^?7s%p>Rko5_tbYyo}1 zX@iw7E{O}urcC#7GvoqK2Ch|#>ZIN-U|Jv54$~}@i>=joCafcl-|w?AmVjIqh^$x8 zaTL`wGB?>L1@tJZX|%oiw=u)um0W9js0qd^!1Di~%dtdw%jNFR!K`)=mZUUf4;ayL z^C3vnRj?dl^X0y@26Tv<)^DBeG+VRRrZ<3DmSva3ryHGzkYeyi!qq{r4+0TlEUybx z-u*P%#JT@17W``vuuBIiDpb{fP7Y?MS1Mr!N|_RfbUiqGoG}T-|>EyW%AAc)w!g>eD|>zWDum-=U4<38qw}?wYZ2>qR!DLYLqk(1H5M zPZI)h&^do{4eFY=2La8XIb5}D`hxIe`kqv#Z_>maL* zm-d^`U1m0G4mmSeVwUNCFpMYdqIQ+Y(AaLV@jSIo+`lMRdV`3?dhT;~o;Q$_lX)~! z^@VuLonir4lLMEr;xn%k^Q4jK<#QqL3`&DhX4$-)HXetg=`Kf={ce#y%2@G;K26uGgRg z%p9`)B8b(AluVYp&bvN<4CfJFh%q>a>}-ZS6CLHox$XG5K&ZaSY@P?XCu^^C2TR>_ zv2Fl8>lZl18aW+3;P?RCfKCXSR4WG^2eJ*1S>0k2`Sf%Zbq0o)1RpE45S*>FQgVicQ z3N_1JGI)tBeu?F=T+IhnYf6%m!`qDm(Rn|f^_f( z|HnSh)1N*z^WELW#@bWjsKj|W9b(}F-Sfvx|8?=c}Gd?yCJg_&WX}OMU2I^|2U0!`pF7Ej;kyDUfHL-)6jxJk<#1_EgTjrRMUB!F# zI4*lx)6mNI?$hi)eh@r%@7`~?crV-^f1Ers($IU%=<(*=ECz#b-Sykb>s?~c-T(P* zRQ`fX#B*Oki&j5i*@fr3CTRoCK0BjH83lKTMlS&&V$}co0rrQ*kGzHN|8UEldTX^n zF%th&8EyUgR0HwAxagp!^Gs)j)x8fvdV9JYIuA~i&J~M8P-zAcK86y})Cy6JOp?tqY;|858N%GRp|f%>5$@ zd{ZL&8YfZTHw2!;l-YNP7g7gzB;p`Qpv5Rv#0!^Az1*I?G?JE_B8)^rpkgC`%H)o~ zCpw{)+Ez`|=#xAr;fv#!#S(>e5vNTV2p{T;1waj&_{~IDkE6@1^*9$XyWw9z%({xu z;zw4m93(>0Bt2JK^RTpgV7E&0orHiOWHk7;2#W)U@SI`5DfLS#1Gbcbb9+nkBZHIf z-Px&S^}?*QW_fP0eD~pUm@lySo5JoLOs!X^MAIxHxC){jB~hbsz)5KxKmpcJPb8;| zA?GOaoF{n?*1P|ns>&(5r>`x6$h2M0$lZ~p`h!mlTt7J~3d|#bs7V;a0)f~yMRZ%Z|5Xy_l4R*po zF$;%a1;ICM&~h07`c(fY?#Qw;kd+N+a1|NcWp%lT3uXn)XJvV;Yx21jjWzeBFN6Bx z2isQM@5`>Wfgr-*hv_WAB=MLg(VC*-#`Lo(ZKqJ@jf9gtiG@D6-(CG_(28mlaH{p9 zEEeHckYR>K#{`nnOHHrjOwLI&Y~Fkf&VW6kaL9*#;aCJIMjE^jk`S*Cg3koFu*P-h zj=}gLFZO?Qxqz4TRS?o$+)tzo(L?~_<+-h@Y`8+nV86j~@xJoe6$JGZ8Z)4*ES_U~ z7|J(|YugR>8k4Ggkyh3YtcQv+b565Knrh0!Od`yA0K;;98v!LC=x^{tvkD~j?OW7n zJq9An1zJ~VlF15@`R_%R;!wwDW2l}RNC5oKGyOkh9u!SXbh;ERdJ4WWJjt7W;j($o zuZ7^p7U+e7xU|YNw{EZQN(psobg026=j?vP>Fn*#P4(glFeMs=o&ayuAg5Yf&Lb1uFuH?m;f%9muv z(A%CJTlKuH_gB}kY2;y&lamL_d01ltQJ>UdN@IWlFTjCu1#0vjU}6i1)vye5rB2%| z1<$y5RF4ATb`Pqh#2@3_uuw!J$sKhUx8FzzfSU;3Ft{eq?m;EMiNjo#LsJIOkXIuR zC6r{ZA=)#~vPTrm^Gs-%$+t|~)nC7|w_bFUk4=}Le=~MJcn?&8MBUEGGnSsKh#N1M z+{c}sbyg0;6$Yb5Y+NsQB0US;IW%hn2!?27uNh-7Qv`CvI02$$VFKrk0v5|x_DLB{ zKgb5_Xf;rWQOcMa<$2qAm`>q~HUqmikPF(BaX1;nx>Id$+q^G5KEaOYL+A|Ig}OPu z?WX!I;cLg*(S;m^8;qvO(fJKlW-Y1vjjG)qW^RfTzY66RReNt64B%PW8&|rd&u$Kv z?_%0LP-uBG=u~!Y%CcjD0=E$~bTnrn0jfw&o^i;5zNn2ID46}|ihX;PUayZ#sL7zN z>f@j~1f}Fg+2YFVHcp-l=UFx2>)1f>>kHbWC@hZ0WPL&dT#$xM03|Te>TdZ_xFi9{ zFbYiv8dYi+Yd$(MJyJXNTJ#IJWAXk04j1~jKN*1I2~TG z!p>W(`POOVq($xd#4Swb_nVrIC`LV!GOd$q z<1Olc_$_gQ=FfaSz)Bl_NM-xPoo+Vu@)}UQzR0HsL z_>@94=qXhrlNGrWWT#)lXa=y`fC1(=cEYuPo#bqqz*ve4)=C2abQdLIGpqhcODi;8 z`ea3_x??~2YYRXYKms|Imwn&r;bBT>R21?;h zoGTY3{{i`VTMCNeh$Rk`TpE_97UhCrQ>af|u-)KJX}2k&PZm_&z>9;sQ6D8jFVA3^2~>Fq(&h zmS~d%-k9luMw&X0f_xCv1964PhEP!OyL+A+Qc)CPhq=2rH{G8BY!J-B33`ts)@{Z! zMrsLB?T<}k9@CDkz%Vvuu>9#2Zw2cS9Nx`5a;%3a-}cQ06jyKwBld6_Iy93L zvjD{m5W%5|y1^B(*I;zTqm;}%=Yh&rqpgRbwp5@k{2K57UgLi0Q{ z>2v*&p2x9FS%nUdYWRexrY7X2fN?wqVDLDC#>|BLkR@)6H-oWxS`l<G(gs6+Q5 z$Z}!In5!p0RXk&aoR8re@Dy;>3hr0f{#O2Jav&*$!rZWi2groh2v|2q>u_g+*@D z(Mc%m)7B;gAoPt!M&a=aQNsKPB?Hz|d=qb6(FS)g>d4nsj^Rq~*dD04)5mCAAeUOs zOK!|jh;pC*9QW(;`#V9^y{7A|2Xb+35FK<_ z>B?^9BGm73sn{o`JdC$bL8YUy$`Rg>D7ZVh^5REa9^Op(tL6bD)@NdcVS7F znt>%r2j<}+rI37PnPC1aGB*1%t#DJOgi4XC0+Ay2FBncREcvlhyHm9oN}{W(kT#6v^G`dXyD~S zoxm_pMra(URVlnTtWRz#>#vd#va}Wq5#U6&jkQ7qq=xlJh{pOp_(5d4s-kKza&jMROD(tgj>V9$)=~=B z^jIzp%>&`r%DZ|PDzOz~<1J_}B3njw^#Rrd%+tl3KU$zUYe;%0GGc*FDme9h75|>*Uys#QVjsqtz%341HLll~>*r!_l=sNi0mrLEC(-3uXx#=F;vXglq za`J*hBq>q&I^h&@yn;1!ukP)Su#e~w{ebG@>rX(j5VeG{MMpsZw5f7bOq| z>OOr>7Bor`rd}S0j)TQQPzjnP@T)+61Q1zWk9l4yf`y*|RMAIPt{g-)Amt&+J4s=g z_(w%w1eUZ6GeYVfiP&03i(%rU$ZtH})5&wLYTey$Eip|FFL)zJ?6{dVdB8G;DHh%slt3v{fgL5e2M_qawd zM^6W!c?8X(H%nFS*j{xp|4Dr+|@wGx^A2Tg4mEhnN+Wx zS=*Kz?VyFyjlN1JD5cAHh(0jbSq7kHld2Aq457~+4G;p=>iW?ffurH=>C&1%{D<0WrO}%J`~(-rG;ajh+Ia-73>pWbgMv_i#dWg(fvcfpYJzI z&%OXr5qQ^e_{h434{uoYQO6?hy!G7yt53s?*{xb;@YcULgm;(r^f`7ITUEb(Y}-r^ zMxIfg#h5QU$2YSg<}ay?uT>{^#zUgqs_I@Q>Hp#F-NR~3+xPKh42H%Sv_WWwiS|vj zK`LV;l~UShLlMrf_#MY@{qY)aS?gKPbKlo}UFUV4=k@zEkg)Vvq1W#XRCt z)da1urJmo^R(G#ZG1m5)|A);}=X?$vAg${4JjrW6+A4-V0VKbB=-r`e{U`p4_Cg?l z`C}fA0bTf1SFG!lRpQWbjY;UehN#Iyaqpvqyo5iTbs(v)8_oG6v3N_SbfStURbXhY|w;FwIQI4E~W(#n%Vz20*;?cn^TLs{QTti zA`^n2ZOcF-94)%CnlWN$(4Xv|GBQ9Yk|v}g3l?v?QGJR|iJ4cEpu6=$I`bDHkL!7)x1|tL!rnnC0ZSOJa_kIW z=piR}za>J{D}pfp?sVb{>NtLa$i2GBHV;(zeU5DxW?jCVvg6Q*0lv)u5O<p`|t{q;J_$HXc|Vqig{UK$qJplg5*usj?EpG&SuGVV9=Re! zJBVzSSKZ;=gM z74%2#ozi`5Kgd08APm%CXo zeR<^h!?lUzn{(3^Y=dtO%YF@IeCvsLeOCti=0YmL0b?cr#d8D#_s8AW> zy5Fj0Pb=c9ZHP)POLwYRB}GgF_oGCDM7G)|7kdq9k;-BB*C1VBMxLZrp(W4+T>zrL z)m$;q@KGCZ{SC(pNt7snR>s1+y)d)hJ|eK`4D6Iquee{ZThaK>B*gdX_JaU_Nc>6d zRed!7Vhdj)_3(ipT>9|Lhd&Al-52Bzyy~ab3@mzVe#_ zYp)L?J5&z9zWymT_UQYEPY}*dG;hmqUnpqzs4G9>$h|<+;S#58cV`i(NHA{LCik(_ zapMWSg#b$>0O10lDbpSs9cg~<)?HrNSL)wsedtHB&nD!I%rsTwwJ%rzlF=S?C-6xn zvuMFS5)0zL0`|}Mosu?v4^_JX3x0Y>?iUS#2}4n%I-|N z+AQt)W;TWlxNVMFD;yh(#zi|?HOq^ZMI6+5I{Eer>d{iu${jfYr?r5^D$_K)ydO%> zLiCaaOatFRI9EV0*U(!BPeJ0f0e0#k*f$GLklcE-)pr>uzL~AIa5hjAW3vw*4?0`| zk|xa(?v$zpte6KgVKq-aCCLg6`|oYldaM84XYC(=J|W;>dtxe3&6dHf4?etiU3p~C zW9deJuFkOB8AbSTjOE(sfkWE#d5qyK(`g(WY91_>$~_1cwT_vkkS9>Q%ZVBJVc z&(JNESE~k| zYPryIfqr&3o!MmP&{zDhQ_k7&w5ct*<59gI&L1xSR!BEqyBzgM*p30K2a&54v}QxF zZHC(1hx#-5?wj04`Xgmk(Iz>P+8Q1**wJ%z=|*=xn4$d=zGQ)X((GcPMtQA}`U=Hp zfh|h?8^f1e&+9E+1!-lqz6+$(v#Fb0z22!!Iqikq@I{r5s}GL6KfDy$wXUr%?#(h+ zG2C%OJ6tTpS4arwl`GASGuMSJT0%f#hb3!by)t$`yp7KzsN_-(V30o z-pi-$v1#`yxJ_=OOb4v-Q)oszHeJR6r*E6{<`31I4_CH;a{fa)$k;_>9f0Gxgy&~N zbRP~@ZUI3OBXB4KhU`8IO}7<?Grhrm)TLMGRY-qR^y9Y9)P^DRrHj@RJ+GNx8j#&62_|9&C?1nU7UI08Gk6##zn zgLxlctFHiD)~(w8{Lsd$hWp?S5*kHMiMnK@qJ^g+(rBOsyYDnVu_tCw9Fz>}(=Ofv z{I`@GTJms-9>nV>N0Cb?FGNM{K)ENkS@)5AA7?zr+XQuA2;+W!Njlum!6Z&$%6U}1 zKZCAa9I(}1cJzk|;E z;H>{emtFk2(Q76#;`Q`X#kGvq6lJYpu8t@fm&;Y)!Z{ zpczS*rHz2yL>rBOyxt5zqsD<@YPyv^pCYjR`?hFL5BBqYjgy;w_Ssj~w$!b*UMO(r z`H>xLWkCD3E_w5?Pr7C;79u$u6fc^3j`Nk>508#&d`_dA0GIA-$O6veXN(Aaesy`; zc{5ouEt*revAb~3XB0XOA42ggh>{D7&O-ZA^VwJ<3~z4Cc!|zztxv2uq;Pugva2D0 zjV8!Jr+ybH*yv{Ja%bNSF`m(iu+c_6*Z%gaPP+#>WhMT) zzN2P}d4R0`0dtSlDjQFq3yU*)r^J_9vaGP*q`7TP-vdP`r5m^Wvpgn20=l4G4Ql*( zR>^sd>Auwrm6Ye`A<=hVQc2C;^}WsQe)<1@iTvLQ9de;kII+Cj%_MQib0CV5hRLtk zAJp$473mYyfX%3Nw%FJ}dyVRKyrO5x+{LE71tq8pttXal#;}lj!mxj2g*4 zn19$I3mrooY8SX`gAsFOVp;FXECr4CbkF=2@Hq`PVg$~>1zV&&5xw(OJipB+4?)m6 z-bAGtBxIdXEvwiz=5fc|@Y_J8u%sjpZvSDFseNs~Z6L{%XXB`8-SAO|{E;2!CpV%4 zuKWdwa(w^loE7c*JuM#j+%Yt>nYQ2U74caozWnBCwb#jv0rO@_Hsz7sIr6ac=++o_ zMGOV%Eq6&@e}WJ+u$F_nj6Fc}2T`f;7soAy7twm(iq^ZNjbr5&*koscPhrs4kJhCC zXw_Qmsa|QJA%}_Xp%sf3fPD}6v2DqdiB<^|b-V1@5oL5s%PRjIS*<+szM7l5a#)EK zdV(}VWYQ=ba$!BJ!QS2|U*Z{M=yJ{UklDK%oUZ^OWB3zC%G4fOq7lrYB|CF`aa05cs`(XbMKk1DL;I z8eA`N56;oO@}Yim<4@aM;nZXKV|_s%Nid1I{k75KG_4`0#0VFyD@fHu`Bw|mo(j#KAbP|!Bz7qj)cXU9EV$n(QT z70Uu&+`?o5e7wU#1l)&5wr?6}k(TSPTi8mQLVG1%zodA9q4b*0`k zsG9&LiW;3^oSVpaUv`qjQR8e5+S$2}lpEc+j?l+qrcFOP6ZT6syHeLHr2`0)FRi`ClW!>jLR^n? zD}_-2hDmI3)M?O5#g0EmJ6YSvfte&9QE{y{_Xm-gfn~$vSytXXeR1r{p=!$S^IifEs}~ z;W4qwBAXVkK%|kzjJg2k0Vh033kVaqk+`!pP*dtuI*bn*hdz)6O83-kR!RobAtuE- zc!%FBJUghTp$RDRCiy^dtj74>9^?I&FIJYGwLD0v$ZEz0@%C^g_Tjm)s z;hrM~muQ^I)rkaGtaXJ4;pWy7ao;-PtuktDB%Ab5sIA@j z?O?rM%A}n(PZ6DWzk6{99lU+71lRH}{kN}_o7EP7-(%UH!fADt$I*0YQB`53tfgHj z-OYG#VkL5%a#U?lno((*N?9@;zkhdt3pKEZlyK4fv}0uW#JW=TKJu-7`;iK7M5TTS z04d3nsp|?dG8pB-11z0o6xf7#!+EQy1(*Jm1n8i0Q|EHXhDL0WBG&AAH5Agp`~zfL zNu6!q$Vo9t*N|!w50a+Yu%A>#L29{5`fW*uUA;Wo{Iu2>Hl>Wk28II?)4K+$Br21o zYJooSP9$(;W}lbEjS}MNz)3>)?u|b{6LLuiz84K}t%&OjkA8?0*q8iF;+dvQZnM8N z)SI42goXD056ue}D>ORkWE+xC)6R`vkfGYq@}NF7HN19*7#OMp6{(9r#c%et0{AsU zFvtsY_&Ha^*qD^nJ~^^o$9SYplBJA!_l@q~9Sh}dwsrJ&My~YtRdl!|WgZO@%|jGp z?piiXMmFOLWn`vyHw?_S8`!1=9`AI!0mZ$j=wj;9L58>iEDMtRR7PFMHv`4NOoK_u z0O&?0rw3Ls!Ve1qC%MB(#B&BP*LR#)jvC_Ef&-gmkY7>j(!)$E74ASKmGg($AG;E9 zy~4w}Mh(r6$Q+G&vq>l~Wjwft3kz8i0(DOBkyR>_ik)CP(ugCflbta{BJ5Oz~D^N(h*y$Kb68yASCRclD?)4c%t~t=k0;kUh%ns6_N6XrpwVO@Zk%|01oXd;R+&X&@OT5qtc{4oT&0OB7e%Lg=S zw;u4YgKC)6j$ZRHB|`&%dp@QpO$(1Y=KbOpi?RDHTkiOJS zf}DDjz?AYmn#Ts9-e(b=bh;bS{LnA5tTHXb_Ep$V+*A>_083qP!jfoyY_D^s&y#p#yuok3%jDYBsf3*v)>BVz_0a*h2hj=& zBkF(*;;Aww>O0v>;30HXmL4TPBk)$$>F&{pW2A}0`QBx|RBXejUz=@@W5UDqKFxgq z;CPa5iu9C;TgF_iNM)zTcpAvk`DHEW=B_e#`L=C88+eytMG@JC+&_r-035*|y`CZz z_1}|+GI3GSV`@1-MFC~J{}DFWSNO+6+wy^yfdsQlE+2xv0(a4P(t0FN)%r$2TYBr0 zvf^Rsjx?w=9sMRT+SmlWFa|5=6O~hM2-qLB)--t}2VT=T$*Xd`wFWw7h~EeII=xi;c3YP&EtoIh6P8@2^`3M3)G zItodhOT(m31LnjcaVJI!=|l4_+v~D%M9~r5+e93XGYLu5-Zz1ej@iO`Z!*@-KY&_u33{t8X+dX}q(_{^xa)OQg% zmMTzkog+_dqIME|1AMjiBSkqWqe=tBj6mR-F-dzO3TjYpk`jnoOB$MG8d2t8Co-Hm zFu7>PF4IsIvjcWP*;A{f3mrHbJh`tg`u6&1rkjWHeKoNbnTrbM`vh{8)sb39nmuH( z7W8Ti3*rWcW+J4$5_}@Mk@X^s&SRPtMtQ(tWM|-s{W5J4FEL-4Y!q)B8cR|QFgIo+ zIkd$FdcN%`lsVLTqtgwBH>JsF`jWTmd}93hy7Dg{mP0vz8c<9TkryG$fu$fi(PpEg z%rQN2?{07T^P|9Or)W4hr0lZ|O@#arc?c$0Z~M$=u{5i3OBWJVED{>;p4*dWUCv0s zJujwK2D#3_NS7tCkK|4OP?fa2`(Z1QKLT3YZrt)pG%+Q}m!|+M)49|FKo=TId^b`E zU*sr-NROgF?{9lSGdi(dQg~@jtMn@yJjO_X6$sBDxu2#KndakSU4Ub`^9_^~au@OO zoDW3Kbif={pSLtWiI7Svvl)v!O|WQZS&&5%ItxWOpFqD8!2vnmj&H`=MDvB=OQL(R zn35Q_)E*i@PPP}|(2TnNZ9s_jH|6o;H_>PY1cw$F(9I!X5%qs^;xn+b)I!9R*;8JG zm1w-sRvCFuo3ZK6G@;ElyU;`RAq^O$4~0|_@fo9$+JpwYAMjgwVQ&)OLr}EkRq`V8 zNX2!2NB508u>M9DkBL}mv>rntPLgRRXg|?` zcj;U!LfeeSnVlxWJOxJtn?h>3qTEn`S}aQ4hIC1$Tjc==h;MZ=85u%xo4$rsI64;OEf&l2L)XOY{yl&)g+x{?81pP z`SG<*=F_O}l&=v47NDJJ$zBW#6jE!LPrhB%OXHZpZR5lDwkP_vsALiqNpPji0@9#| zNAT`fbjy8VArY6DLa2QH1^C&UeTQrLN&MDx!t1evPvfwXi;eXI8`V?d0a3QmJxRWD@5J3$BbI5{Med4`A3A3{(Mu)?0HiN%yRnxcFQwg&<+l<06Z?)FEC%rSR4<<9YGZQe?C$w>Apzp$M8?5e!wA=M|Y z57Xc3ng;~U7iQ&TgMfdcSqvgh)0L~&eQ*OdBE`gUi(^gzce2hxA&WxYYuYT-La_xd zlzQ(wMVxjNTO1)+hTv`~PP@WbROMt3C(9RlH0lN@?i)kI48Xqm4rmfWBS;=s8O{25 zm+5vTNTgm|r${1SqAjQL9>Ki;oxO!bejnf4KA#K_W7dbmACbt02z$I_c`|-tP(K8G zvhNDNVR0?U_kdPUV#b72PKYE4 z=iktuovwGWQTZTRaAvI7nOz0we8T))=SNG`Z)jp84w?d-02iw6lFbML11JXEwRPTq zlt(B8$lE=ACAdpU!*CA5ba&x2A@}c>UpTmxSYt!x)OzHIxD=2 zormLv_XBM}Q{Xgo%7GyR?)cXq z^GY%hyoNU!N~BRc=VWcRj)sei3mCyMDQ5!}|2X7v3hFCUJR=8Qm?H4ybIKh61!PX| zz-Q5iR# zwe!!;1<9!Uz%QmDF%WOW$uc>LzU{}D>~%mm_smjp#ryKD3H4^W)8G0fS#WM&!i~pLntm0YpU+j-OEB+ zP7@uOCkE~4{l;2%@S2-m1#N*+vC;mzAu>6xQ?6>sjQ1bVEfHv^+pXhB{x~Eh*g8e2 z19l2|=q9AXwH2+Woqtl!Oa|Hoh(Y0j-W2YLCAxi>X_4z6{`90QDWzmsnKalJ{eb;Y zX0-(CH4%4}%u8I=zhX&fH8#HDcng?qLW#&Ky^83&5yK<;YWJh5v<>wMoVJ<(nEKN( zEX=yyv?t=~2(mGM-~g+TXu`_1=c@ULSh%cBpuiB`0^l#Xkc~+?j2arghzwCq&Tb7C zrJYWENSe)&0Fjmk{}CK8#N5(ij5^05Zu;jeqh$zKQDt zn=qwbDa|g^=;d!4j>ye~F!P#5#7b*zGB>2|Bn=Fr(7`fVOqc_TN-Ui|s2L?2PYa`uNjNf%vy{$JWC6v<`xo}LRYHlHB7$vyxB+L5BP^5(#jYj= zR#o9f-l8U4MBa$Uj4|3ePMeLLP z<8A2`H~^VhdelgwNwnC0=(=%Qw z!`!I>b-j&A0+K4ylS<}KVGW7WW=^@>hE|t9hCCr~;T2=ibBeYiUhjPA7$aW_%H-)* zib_hr&RD+ZD=+gVm@}Nj3TugwqS+m=X0UrI+j!6w2Q#J_4}&p+_O3|%s#Y>6w#DvZ z+<^H*Z%sRb%##StxHyRl2Na|<%%_>zOlaF}wy^WQNY>t^EgkM?~`xt&Qy3}%rp1k zScYp*%5)J)J5#z4*BdFx7CaUUgmKOXi1*4J1cb$huzbA=6LG*(=Zy&$*yHCAmB0aA z1p?nC6j#I;K~-cFOr<~I0Df9nvU?wij29AYL>>2M$a1(Yx^M_=C+`zRMGPGrbDLhY z)x$}uw=XuVh6>mB=4SHXn+P&%itSXTzC4x>N~hHbIaTe7L~J|i!j$Fun?A*;V>l1f zp8z+#1YL+DihbZr4A$qr$n@PYZ5)GSCMD2w5aJkNZ%c1S%Js+xhN-@Ak8f9n*h%$K zvH`XF^EJp2&%(k8n;FH43g~*07Vw3oASH#HMWmvZ8KdQ`cO9XDmeI#YEo`tdN~HB|eidJ&F8tej8^yCLzZr(MBPG zIH=PNG$!FmqNri-vNdcq0#q-=5T6LJ;8UWQ@Sn8uB6_(Ow#dguBXJ3z}#*&5^-QAR@tMTlOy zF=k|+22YuMZVr-*-4NH~oR!EXz896SM1ZB7EjHE;ND*hM4|pIhjMFwwN-rO4I8_Ib zRsi0s3sFqMdtgmF^aaZAo$sECYb>mU`g09(AYu@V_S zOJo@#4KyGN3k6BPX=fWiG8I*n4$WlG4*uo}OO3TCTNzN2l`!vPnYKIJMhv$3dVPrOljVhJMg)GRVPz;h%DGmaS zT{8}%(pvho`NY@1He-0;Rs#T`0w}KF9^;i}fb1(nED3ld=`aay8<)M8BM}0s7%tG& z4~Z7D!dq(5Q)mX5xw(2hR*qSx+Yewie+-F`I(2@?BpdMYQ-S*l3RGpAWk*sUtfF88Kt>^r4)A&?n6O!d=R5dZ-L_(-; zbqkpyRIeu*Th$Dpx>uq8MOLnCVH*lWAWIvV29cfqxR!?~2Xv&|zPUi&b{-WAy%+ln-=Kd+<0?sC=Uy$y+F5rM$HhV< zXw>@2OYv~@fWnUsu8=wk+hqoS&g?^?-zC;AjE%7dq+?v? zv7xbL2+dVXf{6epRfJAInkPscDnOo=mcSMKv8>ieKk5Khs-YPH7@VlVFx-d^80ZBUS0kK(+sjpjnnx@j(2?}VvZfJ6aRSY7`&Xl|4JH-BfSHd;ek~yp53$ z*#Nh6&z=R;McBI^Z4451x&InTBG*{f?KD}pflea1F)20@{-tOtP5D3x8#Ndhkq{*k z+yiy4ZNM9~!ug*9F2@3i>4XaCRjrVFBf${}v)cc>@X++0=Hg=b^;PLVMY+Jh@6>7{ z%YHHjA+!B)hA^F;`|v3{IpFyin(&44by}rqIl?k89a?4yQxq;IX$lc#RI702py&fY z+glHp5=nT(MI`fp7PpzELy#zEt6&#hGH)-)Nz1RXiBXL>vaU`{|9hj?oNw?W);d=r zv@oyGAUsnAc?ikKx0M+gA=ybhV6-4goyZUs`i#3Q zeOk*iqOHzS9@@z;EQq5YI)HyhBWy{e;78~g-SEd~l4K#MlPQ^|`YzIGkt4MHvxC)# z;cdWRLOM^-GbHy3cv+GBowy2!+ajja%R*>cI?g`F$BNDPh-IZjQZHJ@edrzc?){{5 z){bdB!mz10f>lqW=_;kYh7o_LUq8gX6;(;Z!D&WaQQ`Xm@8wl$gCNyI(p{TBFStCV zJQ?(0ue2LjVMQooBjUWtW^xEjvx$nu<6{%TD|RR12EYyOQUs}>z$%;%bE(hWOalQZ zApq_d4G;6xV@`9~IQ$8(vLK^HH1Zh;GQpO*7)ap)NLKuGRoie{69>J;VkjdiWu8TK z)cd#f3ewNWoZx~0=Vo{*Nhp5n}RgWm(q%mU{V+-aIC;!_8 zvLc8UjKVS|Z8a8iGcq%VGBx2ARX#?|XtHG64yGpa1VD-@fJNf>X!#t}m>~$!uZd~~ zgnrj%lqs7s!%T3?H`95eBQeA749@@hcx1j+50MjQd?1Mu%1*puxMJt?*O^Ra1!qs1 zJaq%WhQ5BLlZn1BJMO@F)Us?{I||{kA?-y9jSo(X864bXI2B9Q50y|c`M#1JATy-! z+2qgUet!$5V=RVHt;0+!fAd^ksEIxg@$eFUac~w(Vi5xGc8bBA zBWOqv0laNrOw<2OFD)>-dCiwr!i+|6$(KBE{bVyG&#{Cj5KQm!K3q37r*IDz7so39 zGY)v01T~$%)YiCWua_x#zPqG!xuQ$wJSHb|yszwe9`!h-1Hy%buWT0 zB3z>$W^z7p4f4a=*XCx90Y}=T{wjKD0Jv{BGBs&53C+DANIX~~uWJZ9RTFc_oGjZ| z+{6DD3G(KFJZCfI$tniw!NHHTJ5EwjL)3oRN$WT@fV1bVIrv*RKehEG41C<~yPGxy z*+s4(N6@Wxd!#qfMqd__hM)6o8))zV3|6L4biA-cp1%_W3J(j(bu!`bVq(j@1SYx& z45?QZ3i`LO&3gwopr>!Hiu@GCxaP9@*Ez85piHAMDuFhBG0_^x))g)L%dh2QPLys2 zbrUALpULW=a9J$!U0t)~0Q)djZ{OhJy|trVcT2m=DrR;92DeQsSw_PRipf2Xt8b{40ZuX-`e)Ob?txlbZ!KH9QjV_n&eDHi-+4D`4 z#lX7Okr92nOI@^kl207~ZGMPxT-j`77GrKp_(4BujMB+}ceznUbc)#Af1yHx^n9Si zL)!t^9VZ9FG6`bA*%;$4h@yZ(ze=O0E>Tyw{Z=TxJ>Qw}V?F8r6oT1%;rp(iB6LNI z=NW-BWIqo`^-VUs{+hRVe$Wf&&jB>qx%z8X{u$g~H++@^0HYZ2!?6mS##aFuF2H50 z=~#i@U!||!S67~jQW|U5lKlZ`pI97Z*ww9nNytR>epR>^Z30fwO>_4iRi@pANC(S6 zEZOdn2NTbL}IZs67Sfna6 zg6dV<6H0GT&;l}f5It*;;N)83RmEDi@s%UGPv@dZS7it81v*ZHZQw-eB$CeE;6Kk5fjri)ze z7nnw)wCnY4hV^L+O%@=A(6h7Kslkz-JszRx-J{`2k!^4m)*szsI&lMMAS(eFa? z^UXJ>9rJ}v*9MdfdRx~5do*mf-aRmO2x|tmoAWKZPweUT-2=&^MvD?lXG5BkH}!G~ zh%5wh2)dSpD^u6f)1i|Zv*a+1{e(lHk{a}A!D{Q-82>VYb#Xq;A#0!9sG69u9N`6e7Ofi1BS3>g9u%S-siICr%N$Cf|fgxKjuTX8=EXk?qYbz4@z z{J!VM40?7{Ptn{~a;(<tW?hzQIRBMzAQl0ikn9~dqhJUo0=0N! zq`fX#D{-G3tk!BK;yMgVWa%kSkURb4pNc4hoDj^il!Q$@IP#UXb%Wbe`J{ev4Gh_1 z2a<=4$tI|TP=~Ze^bxqt>F`csIqH&GA~*F2!qKTkE>C{Meo`F-u&P9bllwD(hK0%% z)|P)!>ye^5`UC)8;?sBs8*rJi*W1|S+%)c+bGxb*&wyHp)fs=0M3YiG7#Rpdw(xHU z1(X0d4sH)Z!{U>egc*#L(E>s(8XxWsQ3M zu33BbIiEf{9P7U0+L8XD7uCj`BkEei?S1EDUtTi0Rl_f8d?4Mr{A7R{8(_;#1O00Q zeF|ao`noh9{ubhV$>r8Y{6chqtKyZ~ghewWu3rd@zJ8H^klVg3-v>-uI48484GjzW zaBQLTY8&(v8QN2k#X}H)^4^4!r!MXlm3bOI#Q>UnzdB(dv5)X@8a1q&{JhewF+DGw z=pRc$E_CZ|h9~_t6PsA~3nRIpHv(}hi=Z{K^Z4Vw z?-9EwrCG$88Z}W?6nKNWQELzzyaj`9x%I@Gp*)W^K=f><&FyZZ3UFT4nM4 zTCT^-)fute(;Kf|7j$ly(=G`dwtfGX(VtcqGA$*#S(j~kqGL*trOP|$j&TPK$}asrdD=!G)EXhN`Ufd7R|skcv^Bat_Z+FoU3CpX zJE55b>n{q0g_G*zO44b2;-3*IkC>=xi=6dA6-#P>op>v!XcP_$Fo(T4I&32979iu6 z?5IoL`9^5n5Ld2DC4hiWA$nMZFpRF@n9A*Yd>TPB>+xikQCwQzp9Bz*f$AF$s<1gk zdH!GSFm;S-_HkqDJ@<7Z;;8%XWZPehyN4Yew`95&6P>FdFo&9<)Z(CZAk5uf_eWM8 zGf^7r1;)XZeD#B!z!YtVZ@7m&{+*xpNQH13vYX zy0~!8Q0%`5^l*Pm?*a-uqf%Ka|K2#5kl}nIE1H2uEg2?q5lIyVK!m^$rA(Znd%kJN z8wwH9qs%vSa0&96k@#()5|ZyymFQt&tIkQKxi>WS8=0=9_3@eV_mjJa4F|Hvh>a;M zER2vB`k#MK2R{$=5axMf3Whd>0j|_RhOy;otmoE<)AB&~p6TUeEzeygTEFmYbQ`v1 zxx(gbJO#bGl_7R(g6zb?>>fXm?!)yI5&xhv&uyQ^PSFZV56JZdB0zdop1Dk20Ad!V<7%yavztGf44Jk9#6XOK&Lgy;VHaT-KCBW*9B;6MYNhkDUM|U%Kd13(6|F7lG;Wybo?_uRP>5; zp~0EcRV7Cr{&PYLP%Ada-1~0p^U3SHwizBG>rz!h&0nf8n#uP;XC7#Vn%?C6MZW%lhV-~_}A zTgQQV5=+L-C?bOnic~}xME9O-TS+iAMWwFjFk&yUh%LxZo8UYQs=!gq z_+=7SWG=7*UGWxnAvdt_=PiNBlG`YI1Zyhg>>ga9NnO32Z>ea z3XGd+dX-)VEF^Q_)%_u4V~aqyhI*T{4ixH`A+=Z`3`kabDe1B^TByEj7h^fU7Z;C1 zL%0Z}DI{UQuFe^jna3~6{7_fyqG&sR(alB)l{L}}kayGUO8@L1sD z#?iV=(|5!CIb-pQ#UEb_Vwz;LzJ;ORgjO|xt{%SH&n7iH;uOYX*G0Z92csy5I?DCutLfKTPC zo4$cc`G8_3_c~p`y)qggLK$GP=SulpVenVen@fbLwJ$?T5LR`~@ zJ@L2tny;U@E#}Z*!nS7S{k~3}{ zGJEaHLr>*)??hX{YlaOeeX)UFbuc_nJW@uJgkSRTNA6KXwy6x9jWM!VYBc@?hse)p z=7)_t_2LmC5u*;lJUB0SNL62?^o=@GL{%m$4crK1_!rg-zHmS{%VrAn9lS@BS3rz2 zQH!cfG$D0NykW{LVL}@~vG?WZF zw=VFC;9z+V?hxslD2IQD1rp5ff(!=pFB}%q;4b3eQ&>tltRmG_Viupdw}#NQGPQxI z!y#fS4P9wEFMq(#Kfgg&(K}=YXcDf9*Uq}bs5_d`5l90>e!Rq>15_&( zL@fz2rK173bVlRWA?El}nFwfa9-X(6Z-EJ&0yIkDNR8f#H#<=OJHcDi06 zQ+C@rbf+i4@luFpnQI6(HTN8Grf)+9KfPLh%{=RYTL6-6Cc3O>migiYDPhw-5hvXR`Wz5NkaTP} zNHuiWuD*U^{EqWpk*$quNnqZb#7J6=eC@h z7#Yc=O@-1_Bq)6G_;M7V3^kpBbU=LA`%vkVSiBDUa9`>8)!FG z31hz8Z)KT>5xJuoGOClLrp4h&x!*ct6kA3nN85@8oAGrVynUwdC+rcV)U>f701^&NKhtmvW^lkH)(j#qP{Kmp?$=oI8ax!D`s#6ks*no6r|0;zSrwH( z=AuaHYola_dl2b|=GAVJF{mb);S)3U7KSx3^FWH^W3k`~VuU9@rwK zCjh#y1ZEOMjis>728-#_I7Q zvXaCo9j^y&_fD`vyjBdBEFX)$rTBZglk#w}r`CX~q4A!u|2&y9ef`Dtzh2BVG0`>A zP+C0eie!LcbbIw5IvO5Z?bb@xXbG#{kTub;OZ|HG`sve;Z8kLiDeZ*B%CSf{_UNex z>gQyF9`t(Tn+(Fy}>Y8)ep;@cnD5u5L@#4>8Kxze{3Aj;u5DNEAa~j1Qs_YXH%Wj}E`)S)N0` zevENC2C#9KHsJW7=V&mSSgd%bLSp(*N7|VqY0h3tQmH9ZrlbP>inH+`RuG+83@H(r z!n$&*pX2rhNeUE9V-0Tt9)%EC>@R$#+&jY)JW~dW=Cp-IGrLv?CbFrs`K)I?@#U-zcHK;e1457zH-{_D4~(pY7a4k zLRRuEP5t8EmP5HBY@F%yeS+!*5^?eE8m2i1ZRj*0W*%+n@K$8kZG{v?NK($f4yCcq z1v(@QBEG?SdgY4yuqbE1n(hS4Vg&dki=gjR58@KtjXbQ< z%GU>CX&9uobOa%lQX)3Y8kX0USTvIAWFD8>&ELQfUQ#a6ZTjpbwi)4qM*Sp;zeOlN z$tP~O#~H2GbW@hs+qb|wPE(2NF_0b|tL`D4&pqHWjJ$|!7t+>5V<}tz_y_I_k(Ink zQ*l#C>Cmq69uo)*SS*9;&|&fHAMQ3aH9cZJwR6?6k6LHu8ob`d;W@*aV<-!4y=HK= zHLJPWpo|WcZe!0OpaT2G)|?8$ZV;6n;WR<&uj-5p(jSpqE@AHsGaE5er{{ogg8(02 z?DYZ&UCv&)607~}^yNSR5e97eJ>V;K?a7Z$j74E&MBd4REb&Ns_=QU$uJONFvS0z#n4y+eKcI z`lu{Sn&@?2TciaV(qrjK8W?Cwng14WeYiNPrNO29kpgmJx%z?EdFIcHgDpEAyCiq+ zcDBFe_I$W3=Z1q}Z+3s@U(4w`)L$y|V7`MXdnCktSdO{i`iP0}G;#ScrN${ce|g{C zsKU4ypsQLfn-9_=6%J8YKyRf5-u2}+j!3clE_9P)9l|33=|Z#(7EwXv-tE??sYe&w zjeaBKvIhf?9`AyX-ZMyGzsFK1#40NDY^_Jv%s>X0NqC zwo=(Fz35u+^D&#uhI}5@-s9uDhS|`}6)O7w^LpO^wW9ddgAHiyTklY&DzJl>BqIFp zC$mzP5OW^%;vG>9N&EY>hy|9ugAAu%Lvd7`*op$}gV2vt_jQhC!@F=CXouEi>FL<&%h ztonQ$2kS$ONPxB|28QnAKZ35z$=;nn!zmZx6nTVZl8A`NrX8mw2Z{XlS>R&+hHZVL z29br*yXLeqDPNM4YV{Fmi4n0!G;){u`1)4!vUU!WFA_PK@HD@{-o!%0h^it~ndfW3 zFDtjd*$PJ-A53!-Ax32}fy{Aud#xKSUkyS-Epg);WyFvm9zk0a7wsB(z>Ow?m*dCa zHE~HbhKNy3bW9Mxx~EXuLq@A_xWjyG0ZeS%Xp!@$ea*U(v3=D`0_reia}R%Mx+KZ7 z$WVvQ>IA*wmS$|)AjEbl%v=k~nWJ>%J#(;Tws`oCKld21{jI_ipUM28(|nYy`*3s+ zpXggSpW`T!{ND1c2#!9I-_Q&g5&_7TI6ym9_ZIr_5bp){<;arfQi(z<)|ZM%oa$cf zms3)HmbZO!CT9);lUF-UCQlSg#jN|ope*Py$3@ESe}v4vc@Xkh`PQ`Yd*p!VQYD)o z4R$4l0p_ZrjwWA{O*q7^ydmm%p|1ddqlUA~I7(1@GE<;{1g1@qp$`WS?YVl*GSWp- zIuR@Tr;=9|+1!<5?mYE_w9FBtP1!G^@YED17W*Gx z?l`Y?8S#M_`( zvYD`rB6LB{D7lytwy%#fp#E2Unmf;n$t-V{uX})`|Pn5usx#(!D4mJ`^P;m zO+_0L!#ol~UX4sRVqez50vB?yZetmgd% z*%c-UM^~=@9bYZL6nBA1i-iyT{t}6wI`+m^gC&PHrz9kZI5Xe3KKdU^P2(VlE5#Jh zDND^X8hK1D9sep8NG1lcx$jo3u_N&oYrQ#<~8 z`s&KnjYaQXF15gtAlNxxRLArJNEF0;w+u29!_qKEvdBZPkDK0WA{?+^7b^Nwbn-(4sT3O zy1p&nBTp&W9Ts!jbAAYwh@8E{ZRX~nG}D_dS`W@Ei{^AW?tgTx_6Myp`__&vY7zdo zF52{E+H7^{v(`Q})h~Qw$AD7RtLliAxj%nbm+#iDvHd`aFoElj474Z)!wX?)qAz?u zT_sF2C7gq6NR3FMHNY?N(_5TTq8C&3gj7#+ZmIZ9k>YLnA8HQ2?r)5n-g3kJP3i0E z_H!6Dw6PPi4I2$t@`s=(LBg==|1jQqRJWR$8hT;#NX=+t^M&~-i))DlMDAof+vCkNj?0IU5W#3%QHi4?mMm|A028AM84;uTMrCx#X)?(LXx=02qWwILwN2z zSoCW{0|Yrjl*G2XBsI82OV;1m{A`)8^5LV+qpo{Pf8+RA%Is>IYpitmufKk)%2GEB ztG!wsSCF)NKzq@ec$>PJ%C9wh+RLi9ZnNOt zYu^2C*KwS?a%%SFrwG_`7G}CY$bV32+1fz)9Q)Yu!L7FES&q$*CRz3eMRR}h;5G|c zJ9jM{G){lsV8mys^v;HF!!LK@c4t0@^Ii@O9i>@piQ+#Z?kDIUbGtvgK`(cD*ChGn zS<|j8zoW#N-c`3@|E$%=+{FA}MENYuif{U+D-pHI$G=)21rYOn5f!ere@pofS@B2k z21?Vr5~FZ{)RzGy2KQ=6EmLH%DF|4_mTit(l?WK3!sgK=eRjh>_y%*zsIg&qBt^9= zCy;EaP~V|_7SatZO2t?AsF)sp7~sJR#KE~;;}w5!I}(F-7l}Jc2v;wvvD}thyn7Ka z<~Lkd>A_Et_`(|2PR(O(F-_CKEcyZNxb33m`?r+*J>8a~td8HS^={fq{f}rylzWS~ zd8*xRMX(mqe_nfqjlPK`kHI9{E!l;Z-2m%I?-U+9fNZruAb+Tk5e!lTr#j6IS(o(N$A@@PYL)mnE$+GMhN?vcv zy8*E5^+919dxeY$&{XA5oXkpCO!7JBc6ayr42%vM>bO_Y21+#9A*5hZP(mUoKHCYrSS&)I>h!bs*L9>n;7|kGVZ=sO<*EWCzw=$Dh z$fo)IhZof&P~hEf=i3)r@W&FBl|rsf=0O%+4>*awWWU;shS08djWX{6k_@Hp ze|eXNhc<&5=?+d+!@g=jW70f+^}XB^fT7mXtc*vh2RrISzngzxUHjgf8)wQZhjqla z^?vZHY?-ALrqbKf);la1U3T{yrv;B47WxTx2G?}RCq0945ywHucwp2gYfQRXZ`)nr zeqiH5e7H0jz&WdJTIgz1nHS*1Gy*^&6z`m}jHDA&^Rbts@_a#@S&04TP zYk_Cx?$~7Qh&k!450HAh`2RjKOTD_+?cKxSv_Xjv1JBWZe;pZTsW22HU3gV`i{_3~ zJ+lG=?wpvZy@>Tlg4ENbw{o7%z+QvfOXOa$*4bd-l*kSLW^~60m`j{o+D_ms7n0VB z_J-A6=~lP0Q^iV;yr>pCdF+^0vF$Z5Q=JJX#YA-jPH7HZ=NT+SS$I;9bE3NHD zN0B`eeR|CEY$= zCC3&l-|_PgCO5>VXZ_IB5G3V$qU*w$oRb)9w2b_!9@*{aa@gPH{NS;F-z&3x8t(|^ zhR_%%dTc(KP3OcH0OeX9DgTpw-lbRbezO}a{mUjR5zmBq_a^_?=?!f7D zp=H3v$+riTjT1I=NXlJ6dG@2XhfR69&Zl!@D|s9GIxh$Ao_E5d>s{r-hL!z(wH5=B zcQKjEd11!b9UBvmS$>{B`1tx@c?ULXq^M<{vY)XUE(+6`RNiTFpBXSy&O5RA>%l}X#PSCI39w^U6AE9GN;C+D#1 z89}eb&lNr{lOkn#$(;U%mCh2zvMSo0V?8MXca#jGUqso>Z3x02*RZagTV{4~G7co) zQ9)?i>iaeFm^F8lOqXXZS7H8`#_|dm5eTgR^Imbl+$gefr-RhKxJ;lyFW$buJZh0~ z*NZ5*eTKz?4*5|vZru+R7d(<4lQV+JQMvzMsD#gj@Gjn5XX)&s&h*1AH$2sJ9d!@B z&D)n@Z_IA){y*fs2UOH&w>OF!C5lR{Xb`XjRH{f5kYYuoi1dz%i1c2E7L1Lm(v&W} zBOnZYM3E5?klq=jN|!bR3Yt{zTLEuP3DnUfOz)_gj6i_P4S}*GA0#pP^;- z@Ztj{D$Fyq6kexrbo|1bM6w#}*)l$V|4V&>Ufzzbv7}=ElhrLnV@KNY9Xb6T#iS6s zvHH9L8_^rsce4mdyhjpecg>}VCbn5iurU;iftG2#@zeWD(Um77^mx1gKn z2p^y#YY%pmjjfm~ZTItJCFqaM7qOmQs>jW=y3``gX8l8^0J+>ZSFl0SEWkp7jj*!b z&q4V`{dJFDlh0spIG!`6X-nyvsutPoc5|aL14jt=BMpnQS_WA9mb5ET-=?QD3fGbU zb&4D^>S2c#6Z%8hQVJ7#CXb18e}V&&0`;$*I=>Wm{;pzg(&m^r_kuc8KtH#Sm2jT; zt;Kw1A#r)_xBdm_PhuNwN@tj76im+6I`_L$1NuGmh>kHUc;X843|nw|gPy!u2Bd({ zH?OtjH>9$zet(&@Ux5Eg?gQi@O*shpS)*wy_18*WUvQBes6UejSLyd3{{DqZ6B;(F z!~jYHh+>l4wr$(c^f8$uYw+6`PhjACJT*bR=@6OYft3 zRe%ebZ;`k*6Jh@Wn_quJCkPvfU{oZBAcQ~*_iyL-*bk^A^{10OQO-`vZq$(?uiTw9 z(axhVkxCIcn-WJG48@IF$Vxq-V4(cbalQefx3}Lg%vpr*^R1;Y@kUuw*YgOKML@n3;ckE>wkWGf4$o~nb(!+1RX9QVCZ8}emrWu`pA6^}S@1{!?H$lvxZdN?r(NLR&!c+?YEL)<*PJQPR_K@(=*=Ru4yNF1v&C1gKfXY8 zAha77!Gb_3gfJ{lDnNb@hzP)I+53jSW(p?C0RDnikP))4OYXQ0oL4GZ1Q@s+3@tb{ zUw=c(9{zYt*t>tHWq!^MzunvT9H6?jv)r_Ik!UirnSpxiDzp9LH7Hwpe{3^cr2>}_ zvmY+T-t+J*Fv3&*zaN&eFjEa_u8OC-Rr&T{eQxU;{*y&^CavY+BLOjE%(PP7&N~`j zkKZX>v3)(##t|xL5yoa9A|l%m9pa)g;8vmKpg+kvN2 z72qxKtnd+Xxq^m4QA$Gx=1NE;MiuBP=%Ud^7sNs%=Lcw#kmv0}$B3Ty!vu<;+=xa%Vg*`8Jh)~=u?4oGsTnYKNFCW52Uh&*+(;1%QF`Go*aD^M zmhS0OmUbk2Q(X<=l9FSRT=YG_do%<3rZi^z4h`PX3ay+%cJefR={_uFGDO35Bt8U% zvFC&~01UqmL}=G^Zf90$SfM}TP)+;bS7KoffH|@mYTDo;c!+U_3XQD;cct^-iNq!i_aj}FOifOxUX&xcbS{N6?M22G8xa1mg2i|s;^%8Qn+>f zku0U^%gL$Tx^5|_bX~?L(kAPRTwPWMdNkz9pPN+Rm+UTSxHfFw6r)lUksbQT=2{^q zbD_ie(WA&xB_FKf6xE!#amzFLK&n#$u+Qj1UxnyZzu#e8Dtp4G(m(iYNYPZI_!jzE z?UfI0QR2*FEy2YM4)BaZ;vr#o?97DB8p5A1ykXSnj#mpiu+$vt;PSFK;To6EOR|fY z9jmq?J&mMYkSJ-$eP!5PQ4Ps|eShCn?JlQeP+3B1&(^7K3OaG((Q}d2tns|Fsu`NQ z>Tv4TV(n$LJH0OZig)ss2@#x+4V8+DYUbTNNS^4FkgJ}0oXFxE6Vum*@$acG)mhva zv@J$H++|=hg2x)-$`>Bu@xAZ1z^ZPXfD9L`0BeF-H?a9SP0he!DFDR$ftq;arsabI zadw_aWwa>fMeLunN#GU1TyhU1#}vl}je*+tZFa3ADnE zL$WpvyULpX5IUw@*p_bCKv50#G^#u0)>^C|5VCLI!t^LQ8^E*npGZyHJuxXz4KxSz zMvh8b(ksBfxtw16{;xGG{1o$#MeA=4zVegKA8Q7C498)xGn(R8Tqb6BjCp}1m&{Sr zY@+(Usa3CLRGd8L{a}Z9r)N2zAwxwBX+Hvl^!>YuR~u)-mS<+KYb%58=)6ya78Wop zRLyD6j367Hm0bsi*~Ifh1XbB#L{oY0U;IS6%4XxvR1JdZ(! zPKCR*CReFXshszJ4mKOEjnD_;e6n}jl*b0%$+E}Ea)mh^{=F7`MJUZ z{Fo2$$hmQ+KHP!hb7rwJRt>_W?0jOZ99|a{&#+A_@+DrpcUdmhj?8Q{Gg9+H=fJ~p z0^H2p1q08JFZyUo0&HCbI#L5Ef8?_*P`a;0r zT7&l;CONnei>As~m-{~QzdY;Q!g6S=)#8c{io^Z|U>}Q7nkox01cc#)+U@yQjkR9a zg?d7pRoeaCzozF;IN&~xBDX^DZF|ui*aD>xDE6I4k7BqGTAYw^=LOJ(QT`m_i18;2 zTMu7G2?mJB$|msc8LLbm;-n+nnKkeXIQ*5PKQylYJf6r?xFDzapX>nz_ua9}MW={a zM)m2#k;(d%F^g8WR(C?p4>MKdtUQ|J1zSb{!qm1-b%7Uz`!YsDi*mC(HreaUV9bSE ziNU^#YDFc>$~TJ_+B!A5V;#r~%atWLp^tc5ef;Ogyt0?#=(b!;wHGxI*fkt!t8vzL z>W`*Nh>~Ch`h@t58D#5X1-WPmfJZhyDED;;JhUl&LidHjfdCdRCvFEqqU(0%p@QY7 zSrV>iThh6lcqZW)&0V;KYDPQEJK}_Sr+*j9E|kd>E@}vX7NiT-4jiP@d^d9^?N#bU5jlzn7K!K?dJnom{smM|Lg# zj0VLz;Lvqwm%n)7$QcclXPBuRgQDQ%&fE30nNsJ@{cV)8(2u`XJ=$!fbgL!&V0~e8 zE$^$Tk5Cf;dc9$s zOi1=d?!muBK6B>{Icb0sTkte1lLnrJ$=fDuav$`lJ z9wM2nryROqr9vT`Nbql8%(!ry!IczqfFd&-q&Vxk7{>#$fu0Bv53hL`c3p&Np^{pk zi5^*VdE}Zq)O1JlrC?1B#D9~pj2`V=)hKbc8>Oyt?0P>;O5k4==ms|Kfl*^-1^7u4 zAHS>uHNuBH?Y70d^B5pU7JPxF{E+pBBhz1E!#2{s%yTwxuu1j|t*H}Hc!AVVDRM{` z1LO53FaWc?qVl*as|e=8uw%m`dIm;L)R&IF%+Ag2=jjOp=~n`*IGzY`>OVtsJ%Kc0 z3yo1Ldr@wcwq@C7qfkBWwqa-!{DFM{96SVfL&y0Z@mZi70>0c%z_m?_5EJDm06-U#0A zFjO;orUz45eW0Bn0SwBa1vV9s`KbOJ$lVRN1kl5vOCL$Ts54avr)k)mk_h2R6&#Ih zo$x-!uzG|!r5y^3YexvVTLab+dIV-wrIf?yr9G+Hw_;g52F0w`y|sE zfQ3`Uv!laN0`rkJ>x|sh zYKpt~lKLe^iH;VRU%dNjlieS!I(>aO-B%nE103Y;O_K2T%9ena1Tc!>qUTm>B?g3{ zfFIjlnxq6jkXvk5muo5UdF0vxF0s>ntEDb-J@|pVV;EO) z%*SE-i73)a7x7`ETO?d0wV8x)xPZ)X@+JD+59YJ(bcfE}r7@1}TIn7_EJ`FU0hUqC z6Q1V^>tGzrihn%bzj9+gkq0URs1rG)JFLls>jpUJubd_`0bQt(ojx@Q1pzCPIn1HPR#%RG#>oSNH>x7->9ITAS570!mQxI$5aRgl* zk_LZ6*d%>PB~z|EKxNvEh-JVBjW;qvP2k(RNZFtcFk5H~w@{bm1sJ6qM+hg|A|m;b zonhi3f$o1GMS>;^`MbHAVF!^BG62V8?IaTbc@W2mY+7)Gg6!AOLSax%3Fq3hoq7P- z|9&+X7qiL)m&3Ai3s4^okGdAbxwb;B44@+iP@e=h7n0MUeh@9Kfo!=llVgE&B#k0_ zGsLVn0xiPz>&WmPxemZ+O15zs`5dDeFmnHcQU%t%uJc5sY(`WTs^92IIK>Lu+Tjye z^T`0u-2mpLa^1t^Z9j!n;WSCIKLTwuNz-VhMt0W7b{#GM{9If>uYHQxBJ4OMfJU>u z1*Do|TcmuZcYg$>CJ)N4;QVsO#Cn4eG&f#B9BK*D^I7RnKngaBum(isc%%m;J>Y;r z$4`w+1G6wHc_UaAm}ykl0!+!L!KNDSW9kYIMQd#-^c&4(eeb<uLvF42WH~{6$LQYU46;Y2wTuf=x#zVDtzB&_t1XN*^q1k+us+ zaA0qPLY&fitgTwPkOGlz`%>H#1r3srd2%1r=QZNu<5z%C>}Q@{Se}ty2(jC_m5837 zD~+HxXmfH*4=Si@YagOx;FkT5J{gTP=ohBhQwdP!@$i45D=&79W86` zG!|fnp?E8^2g@#de+rzHz9|WlYahfEp!=;O7}k|?(2(pWK;WuA;_GS755D!z8$`bK zuw&r?itPuk;+4kbU>?25Za2j@b%_6kQdN+k395^bg!sXgD8gc`LmzS#r-G6%Z!wkN zKl6@=u6n7J@(BZW-murB<0#?$JgV73*$HmzEa@AVAIRx&Ar5|m>XdW)P{78U)-Q#D z>)?gRuaL(Pm=K=}E}X~^d0`67!_Po~G%PUw=iS6l_6{QtB6NVQAX(r&bPDi)kY)<# zZrQz|W|giVD+7+>;p%I+jjwdA$(t9b(I|GbLc^_ftpJOwt2Ln&$a%u4vnOF4Lr8?% zTr3MJTmljnDPWgaSDFYjV24nJX8xqsQ<{RtyJ{FG0@-L@^#hOb1OV?F**4G^#{lsV z`eZZ&N0>3tIc5IVDJyS4AoBh#0d&s~0(lB518BdH4X`+`&mEw|BEE21?dI9F8e*g& z9-(^bId|khPy%T|mw?Lvdl~T&)HeKFW^id*0XHAd0~~VBaYHFcg&Yxl=V*<%R)~^- zk~Gjsc>Q`a-TpXsao*P>;K6H$5^dPK{^XL!KUa`z!$heK zyu^Ht`JJKSi~N;53~4#mgH=r`#R#lMgwYTJa3X^?Ga>JQ>H9n`M}TI%`N*IPT(O7& zd;d@bl*6!Kn^>`>#{5&VEr<8)d&q44$PH+-rb{~Gszad(%606`^x_JUVo2Sc$ z`-`6ItN6XY@9CeRx?`{#HFbUb`+hFXaIuPLTs=qm~H`3F$C~B&htXY74yEzlzr(%=Cr8bNKB*Ag3Rix zgxm39vprf)Q>*-1lZ4GQxjLnkaA29NO;n<-|5hr{i06X5#ITGP^+ursq<9|JlgQp4 zr8H7`$g`c$CZR+myXNYSd6Vjrbiym{7KZe?yxcJf zPCF3Y?0|Cc4sp6=umqQ;b$$B5vpTa2ixOD7F>zVFlQc;sfW{5bonG5d0ZgRc{eS+) zV;kaMplS)(Q&R~+uv)9nq9ii})%=D-7dVv{3P#AtJQE61q-#$>T=7KE+}x;<6YAbe z^-LRZfI`ur>^yvC3pC7!%@h@9P@+(YlkIt3ttn7fT75c43s(y=ib1sv>Isnr5L_bb z>n1RU)m$K#uGTXcwL1y%sPLgXPzcYPGqn%G_VToo#2M6|cj_gXdwCxvj%Vu?V7o<# zkBK#f23nc&uCw9^oLV6fqz~e%iK<2D$aG3YY55b$10}1CrbP~uVaUt@Gq`cvSkvM{ z!*k@@ga$}pTKl)1nnszf!14uTvK~QEdB9mNcohv^%!Wt!koijJ5Ekffda ze*y8XFDPY9Z9XqUMTeF|bgN9iV_7)`H{ks(6eI&cvqHJNbl@tZN|m!W5s8^Y31|?9 z=6qD!t!D%%KNRo)Q#HVL2`o$Wmq3-&CtaR8k-aFSy^}tK`?{t{nnxm*BZzS@InU4ML*mP@&U`J z`9!`QNXCg=ZIH^dQEYK=qlYeFVw}Cl8E(AhDmWY`d#231%1Gw)+@8AfWvAK3*LN5{+2Ix@P70NqB`%9^+a~^*ZmXnk&fxiZE$-Ve zE^jnFsC;6L2GOSb_Y9in6zm(E{qC z8XI7Se+#zjEVxiPeeObuJQ&RBVF)bXtefv1z*Y@V@-)=Td?*%#^T~0BuPB_|*1`=l zUV^W<=B$4m3n!}Pf}@E2_>+`|;9~4=E3-bc{m;vemuId{Vd2s(PmJgo&vfki@i~@KEnK1E87x#3y^(M;#u>R49F7L6QQ%KA1FEmvs#5lQ6SQ{cY%|0g8M?4mh|RU;FZ$zrTFmm8 zukf#SZ%+2QOL*y;GE?NdU}v@bga==V4-aKqtcl>`Va6;>f6@5Fmm#qatzALc(9Wx zw7FnU+41j8+A415HMv5NsPrf9yJptSD#!E}zV1Q+l4oEGYTTQYjv@ySfI+(|0lE<@ z5EI`K08tE3PY`LL@DXL_fnWNW!=D#k%goO|u`N-2A5+m3!;UciYmM2$BVT@>ZWtH_ z2I*JPSkkV}(kG{*-rFSLeGTWTMx9~M**V*C$4(>t|Eyl2V2`83J%FPGVMfdV%cK0B zsxfdc@dw0)196;T#tXYfzfWe=uGP?`us@l!dyuUONE!s>PWnUEP&wE|9YX28p$fW4 zN&^B+f=TtZuh5J1UnFQeSAv<3r-fG$qNPyrz0Q)jNjNVL) z*cWSFERdR-&9d67Wz{+5_M#biQ=l}3)RXDRSqan#O_zw8mf(cgj!6@!fes`VhP~~) z&qME&d2X%;iyv@FT*fb?#6U3DDN)^E3=Wro9E-C%q1_eH#P%$0^k}|5Ns>ZbYqW_f z@(t|2@qvr^1=)!cbc>Bt-2%EpelIHtj1`cugs(0QP! zLzr|07gpJznjQ-C7wB*@?iT8~ZoU0=9>(DOOe#%2 zl6-i^{ON)&{#279qky6Ea@w8`queDO<)1Q%HO5E(WgNU%0t#l01D`-t`KrbQHwm}< z0_>R2n+s--SPx$^m76@21EluL-oaeA+C$}rhwMi>SamV8@lBVOT?Tx4m)$Slp2sCh zu!z}{7ZyLTRJxkuR_9rATCW00zp^KweKneLi=}6fZUC&-cZsn2KD@N*eKK#kkf#G65n{Q5CtS-gbNV8z_#Y`RGM9*Ab8@yv^ z&etDkZf4ji!ObSFWvz2TKG84YR;{RwBCP_i_1I}tSlY27C~^UM_o%{@lMa#Rs_nUy zu$3|Nx`YJ>P6310F7LCSpUV2J-27|w%lf!!ynj@1^FK>I#>Kqa8-(4wW({Mp{*=Uch85)eB6|-!#OR zk|!8(Oc`=&F(MunB)DI{z6l*kCAZMlNa{X)X?~%mzW|q!qhHjJ` zM0}(aK_j&11Avx*YF?23L&v(No=rNt`8{Ji2sPh|L6A2uIK!n)B5#{e{*$T6I@ZUx z}}{m@mXkXBEx`s{=Wr3F9)#*m6PAi>jkU=1~cRlrISa7rt@mEzZR8_=HPXuuAk}^&Y}MrijL6lmm7b3#&tNEW0LP8TPdkLu(B3 zmFX!1v-yX+{1e#w0=;)Wj?fgmsxv|Szh&)gDek8m6mT`ZxxIUMEScl;Gv~IIEzBaP zA~B@{T~iP+T2yXgQ5snIC^5=KFf!s4Z;#%Lo3_A=*Xrs5B{-pY+4xX{g{A5$jxp1B zwsby9BchIB3lqbhC^v5ViF&T3`Kj5*brcm2HM8Ya9&cft;R;XJ{g=lcaP61}Bhf1f1$J^y3 zIwM5vy@aPeN;65mUqGJQgoO$rpphV(had@BVN#Fb_jQto6jeFM5&Zs-$JcJp_nx~D zE8TDQaFg3JwM4nc8a8(>j;V^`->f{0Gg=f|_~P1*+3$*(Vu=v3m2b>ubomt0*Bp`v z1fMnD&sE|*nrOj%DCcv=?hC3h5r0R^BFIu#U&#EpKq_qADLmf0&_ll}no+t^mHvP@ zFkXr`=e4R*n(hr)eY6L+{2m4lBl2=F2C2asZ5I9|op@4)m`zyc;Hty9Yv|ttE3UnNs7O?saXk*ZkX3_39MCu5pE&b@PJsjo)aL|Dd9CVaQ|$FggJx zSP2oum2#B&7QNQ>*65q)P<0TIrfG}ds>~S$(`3GJq2Cy`!U(Ueh@?ARaDm=WM`ZFD zfP@oexbiesi}&#oOXjaT3NG1B)K4p!acK$Ft&jmfzniTTpr|#gS`&k3*3r*&b@0{^ z$>p+hhZ%3qDl-lH)zZzQvs|b*2^gSb-@8_(n5|W+*^CZXn%Z9c*plW{|+a2AZ zPA;o*bpQO3y~s?RbaP4e+tkKmMxDCGS7M(an0$R)*>?ReZI1FDxiG9RcgYsz_VjbH z5e%z4c>rW!;^*22Je5nrKzyp1X-(I#Tp) zP3y(wXl~$v%zHiMDze5xCrk^{i4;|CRj;40s&8!eKd&nf&5^lH@;+M$i#E#>;bDMb zI)3$?w)*rX_7Kbl4_r}p^#zsTTbRnXi{75OMkaK_C3ud9n zdXChw=1dlgI<2r~gIwzoPxWle%eE5&r$Fx4-2sD;rMNGJ__FI*-Fy68MK3ZXn9M|o zUI{hT?z?K2Uns?#i%qC6boJ+7oE!Sc?Q?-R1p*wDtn35cBX%l{U_^2dxmJ&&&@JX_ z)YKW+fik%QkeXZI)eF4_H7W$eeD7h>((uQOGqh)YA9HGbX!VL%|EuIyM8M&-s83s1 z*wq zyOE(HG6F%pihNi)kYQDMwDVt8aGCR4^_71Civ(na3#2EQrd1%E585MSg2|tLa2p+6 zj(#Ho&;vsC!Q7=ii&O)!G6&#G&qO@x*sym$2fH);AkHBiv13K#a0FT~sno+TGTX*n zNNA`>?^0s|OH{Keuuf%F$|Gs*{kH5Fi_fsvI)gF#7*)WZ3efAXI;2-m#-wyz?+jJo z4zdnsk~(nu{KkqaHnzs0oo+T#KIB=PqS{Pk{oneS#!{uSAXcCdP@!V@-D3|mvuGA8!pijEGImYhiq!!;tRyJlI3-G*(yH7F_>ofc#pPR$kj@Wu3N*eI*UMt3h0mI7gWqZYx) zqJc<$%3P$|ZR&OQWw( z@|x88-0e0VKG3%Mgr5sX%TmIFsf)3gqQojatTocKzem5MSiCgzx+&eqz>E^WQQ zah>>@ZsE$|wpjga_KC!4NP_>>LH-b0vQi6$&<)dty9lKvkVG0a?-hXkb>l z@SZ0@dh7qDRy;_~vhMgE(2ft%O%^OAs=Y6y7h-b%De33TI!EEj<$i)9w%-%8(!bmo zJH{3IfFT8L6I?^kudu2yQ{k%I4!{^^+ragx5Bw~g_ZNTL<*3X38zPJa zW0cJr0Fy#YCR3`%il7rz&_EYOhe~*xm(W!K8p-<@0Jji3NTVBwA}B3dRjFt|ey1rA zh4_scep5}aD%oZJ?=Pcm(Ucigy*o7SvzLjCMH%ICXLy`#5cP3bZTl*Z;$}zLShIToszOnlBb+i7(iw(sm zxY>o>U0X{!FTJan2Nr43itV-KiCWr!UR*ON_>jH@9;UFu4zuZH&{h{jRxipD7PPsq z)(uW#u>Mu|{*M-}C*|@c5)xPUwu)QZj5nm2opq=bGg=xE zqkI@RWBT7&B(R<2Z;C?uSHH_vCzOFHsxsDs;)L9yMJ)>Pl`Jqc$hf ze^qtw1k!||3xvLbKnhJ8c}?41ttl^+&mtCD{O+y`;IGjEu1Bwg%fuAy26yfmlE1Z# zy#DmS=A-fTAH9*wKaot2QiTWqB-IAxHZQDiL6^XXgDpSt&M#d`C7?C{9}Xn-_NZdZ z9}Ymi956YQ{SR=&y>|m=|BIb4T9oC_m|f{|l&a+Y)G-;f^*--n{j*_fl4}RP_hw`@ z%RzD@UcbBJ8L3ac@J27}wT-aXCdh^E*~8*^EVXFPc+Ul#=Byu2VC!7+eO}wB%*2d8 zWJYh2_jJv+b7?Q?78kra_3P#816}>IMpb^$FD02G9}p@-M&~gaMgh!>gXP`n9%kE7 z-N@ND6-Reefk--tcagw4rhLfu&9J!5hs8N8!N*~^?lFv7E(0$(5gN<+@qQ$Oz)Y52 zdu^gtN*W#8+w{26uT&k{1=GNyNg)Ty)Q~xhna+Si*_!2Oq^Lq(T`u z%`ZCMB_yV z2s*)tYfZwZApDn}3++bfJHYVnj(nj+v} z4FfKOP34^rYtF5Y1Gw_^01HS@G`{CT*N6oVcxn$+N&y)P`izp_<~q4=fLnC{%}UcW zbCFT}_lHNMS>byt3=8uGXIE~ylm{V0rN0u2re+puT|?HJp+>WUQ?AH{4ZL`-B0k`H z7u>dM?E^^ea6mpsh@6b#Z4l!C8O?rscwh-(2T9kMSo}*DWkYG+_<<5+4kZb|34xoc z$rKrTATbuTS%Tk+&R$~FpU7a)(k=@D9m;_P8C`RC*8T*V*xy*%&wACh#ye&$ub zDZEY4KO!l}Y%|!!(oMC4VjKO!pU=q6&;kRD=ZI^I+@qlfU3(-6BO9bI!gtjVxecv$ zG_;V~R(+6{oa=q+4X)PqFc8(x`t~f;7xyqWh69j= zcr8I_zKpQhhd|LoZ;2iKghv%J1cFCOR!mGx{$Q}yEodCP5P9og4w)?NmN#FI1vFVK~; zc2eIE!MOnac1i(Uq9}}dZ82GY!rZ#9_SwF-Hr!reZqsA@$!F1dF1AXQ1!offcot4V zK9?Nak%caF4OqxLfIBjGoh5Vwvp!~3>4NwS=4%oZ+tKgu63!x*fN|G}ij~hq$HjQH zK0OBsCPU`Md)IQAoQ8yMMSaGXe~OAYSa)`*K@hu@Ue1qp%IWlq)o|=|`Mo;B9EeT1 z98nlNMbn#8jXmrNx>(>Ufk+L#z#HPwM;7!&(pyA+0UqYDK1RaAs*%2~j_Be|ta_{_@t@3T{@@KWD* z@1cuZSm*Fgu`}Z8v*+N5O3m))%5FdM(3Km}%M3Tr3}V5GQrQf_O@F`S-=fS~olinQ zQbC3>U;w(nZ|#0T=V$t_mDV_(u?1J^d4~57nti7R8LOp`Ixj|dVydI@@*hCke-RS( z$WYJeG_lLjpFdgM_g5jWyorvBVR@obMMN`rI`+$@n@120J68@MgYUio(WloJ8uoGI z4~#~W$fBQWz6Pm8-ybPB;E@>KJPWAKm4Jqwha4rLZO=Yi(WI7WfYnk0M-}BD;7>E& zdJ7DLv==!**eSap&LZXV`RkV~EfQclUg-Wjyt=Zk)c^B^(_mFYkbZ^7hD{CB?q>Z} zQ*c`Dd~71vab`S|m16Nq2P2SsXZP3i*SWJWHr^@OEkS9&*6_M7U8e|UxGxxT=&(#( zN(zaGRcI8l^0c3bGibL0CW02&oog_m$HWgNjp&s+RqDD6@$lZWo%=WQ3=0mDjU$x$fwd!AJ29kCTKm@CJeX{XrOG26IIKbA^ zxqR1r8O;mD{m$OG8>(8oI6H(#^oHqPS9)M8%`-On@3|%}jxb^VcJSZ5ySrP{?y-)R z9ohp6VM(<1hTOF#!GaszFn0jNIhZVs-UQ9Qby4E@VW50Dg+m0)G_;vqZnI&8xRcbM zSo@E%9+IUDRtHP%wJ^2rRE5~a-^^F~dHTa=al3RJCY~kiGnsg3qK^~XQXWrOIk!B@ zfRk_ZCmR!#^q1W!eC87UY8sB$RHlbw9QrB}-5Dj^ArcuVC1wSOzim!MrUfkPyvT3l z>KnAeP_rN$wY>!iK)!o;z^afkOo05zF2sArt&(wI6p7UL$TT)cy*}pLyDe4&^%t_2 z2myMCnQJ&d)G^g0zGT-XL^LOGz>cmqjANE?8ZhKte##M>@5s3I3QZH$9|T(nfBJ?I!m_OoD}b{uHpfov+*{&$6!SGkkYsJ4***XZw&PGzB@o$!cOmtqG* zit3OIAMtd*#|QcDl*8aUG$9b>{q+IW-4FH+aR4!aCMXICR8zSIo00LBep#RJbV-!3 z8%g88k7@B0n6m)ip+}b6v6d8+sW&Vo|zJCWO;>6D{#tKm(f? z`8=m){)q>N>>T-xm6z16}Kg?IwgTpGx&ZgXJbImKh{geUYrwNbxTYnYq2ZD}L1VD=&pn9y)84}lM*HxQbIx|KTKT%c8Twyd*y7ihSwi9c<=N4vw9%~!&0`E{CZgdppknisp$wI&X2BC|Y87$o(^k_@Ia z<|azqZAU)|gV*iXb9#tk8zu}aj((Jtp>unfHch99xGJ$qe1w8#h z!cq$sD67aoGZJF3O|P$*-6xvzzT?cS2LutOWE)E`~IyXSrrJ9~kP?+vw5J3yA24x0gJ; zLg@BSFME0oI&GG;3!s}?7qbN22JbbLeqzXQMi&&Zi1BL++NJLgrE!*>-QM#L20le&E~e2tGNA4E0bW zbIwQ36#TQL=q>PShmj)H^4>QGw~M}bY3!eo?k=~_|4fCngtT=*G7$wZ+<5)%5k~d(L`0vKw_l~+m}b6oK6KJ?uG-x z*0)uaG=X1@`rBmh@!Z}uKR;FmdtK;2OmF6gvaKvp44?J3ITl!_9vtLDmD9({z1bIyjPw4%^%8=_!Dj z*v3xk!lLeEB#2t?rr%niO(d;Za{qhu> zgyxJkpoHWDXFu939@BSUa!k|Gn$hioXY5+gHV(WGbk{!FNN?x_cpv-3?s zmeOLm&A@`%hmx^~x)mIm#m^Et@vw?l*wlrDlOr1AO+p+eYC;sTW+855+Uc=XvY9IG z)I|H2T*JXq{M>A!6#lDJhL28Yu6GEx(-}F3VuY+Y6!UYBUhZFbZxtI89eud_o!{I) zz}{6;V6rT;KLJ|+6DvM>vE1?2YZE65Na9*~CNB!*_Pc&vs=(k%I0WyI9 z2aFu56Hka9WmGLbZHng_@cvtIAW9XO3H7WJgjg?A zv4=H45_D=WWPW2fO8_tD_S`oGIDeGXsMMRY0D-bB5{s4B`e06Ha|k4dusW&W-Iww1 zxA9j5L$qd5OZubNX|A04k)ZI=m4vkap&h)*wxD&#DoB&FH~(hcYGFR69oO~N70E#C zizOVG@WkzIi{M_3Hf~_1diVbk;O~#QZP>3;AbY=&Bmk5+qxIPKlKnoqMrNM=}sS(G(-~f@eiq5115zK$Vyv7q#bsXF} z>D|8G^DiOo46Hk77{1GWsWA$?omJv%J2zDwCzaZoPb7B@`KPB5l5Nb7W#*2 z2U}b?3TCL$%So06Zz4yTJN~*QH=`}b^m7MF2PCWzu>hVR!QXx#5Erky6g?oC*4Wg^ zdQF8pNFNgKrR&_@^y}PW-Fw95RnrwMZn%K4wztT^0D8#K|3UX*yma#``*(%vOJy^@ zoxoj*X2T$OJ6C!ghAkTCq_4u_sJMp0H?&yT7dR)#d|u@Hz6XKFI4}(@2LqSAAdZm< zXh7@^6hWj~2YR=nbneMj@;$3DF6{dOX&#u4u0|xpn@v4=*-t#fwqjXT!#m?GAs>+P zxipy`&)mAhC!=*DuO|8!92#U=VH^Nc1aQ8Kwl_lPsUWgX1Pm1(?CT+CCt{JP6$uVe zPE|OFp}0N%a@>0PP!Q*9z}JypAMrjwCk0vv0XmWsNGclRoX?C5^yhTCk*63_RWoXh zX`(M8b;S)9a4cv7$tE}!(5?9}C9TW9qJ0oQXUDJnA4b85`-raVd~@0wQJDJ1JX}y{ z79fid%oF8Gd@N#jHQ2)uQlqjv)7xR*9Rqbra_2PYZJ3bqbQ#4+z>w+)z;&7KILE<= z%ISdteBYV6V2{2yP0c6jt{bZeNpupsRLag z?8y%X?$ns1zXD-`FHYS&kD_XjgJQt(YoKch6*T)*V)-+DKfozwNH{8&`Vi@~K)kpg z2f3-)?o%z`6j2UV9zb4d>k6i;Zv{&L`0&plm8s*>H>%e<5@(H6a8$Dtq*?y!6v00g z!g;|GZV!^NIYMp(oC3tUL0Sw{Ex}1xm|P z<6f5#J2)!*m>7}j40$8TZScrOe6Ls1+^-YC2w}~>5sqyV=NeLSgAOTOQJh0593K>! z`~6`$0|5)~k&8Gup%MC^n!M83fTa9%T3Cd$$3$oJO;jvGo^k&O@$pi6HLu5+EV zX*Yw&DSfD*HFO%$!VL`MrL|_Z?X`)f|CC*!SV|}LZ}`~%7b)cb zR>?_TZ7X6}2pyB~>yPTz&b!Gdh`o8UO6R0?h|DE!*7%0|+~>El+FHq2Z@r|va&l1d z|XrA$h2 zx(q6xu_jy<{#}TTtwQ-j8q9Di!7b%BlJqI;R(GO2N?Nj!`eei8$2dp7V#VC7Mf=9of15;6>=>$QbadG>p+oo z|9r8-RL|*KlNN(foJlXmg*8S#;!p#+NvX&0B`{GuZ7 z)YR0GGY{k9exta~e*JoiX*e2~a9Y@m$W<66_x1P34%Z$%a^&1 zafuS1jEhoaX4Bm)?naP{SWo*mn_^Y5eSI+R;tJwY67lE{-uKJ5^??KFC(>P%ZdJnUvB~Rmtlex@RFy5ABprOHoYZaYF@xtir zy{1qLw3vBt!WOmH6Jsq1>uMZ$7H99t&7XlRu){DW<%k4wr1#jXLF@)1wfz9T8l88BSG;iz-WSZN+rf037<}p*j`3P z8A+kF58=W2-6`EV4xF-I9L!E--9xS{S2Moty!^E%cIY4tjqvZ)@er89eB7#@p?#bZ(vZY&^;{91dpp3c-_J-jic2%~G>`Dnfpof#BT}b*mG0q-) z-&94x8II~8Ae8|irhH*uo9j%_JAJoO>a_i!TEZrpH|cQs_bxnp_UyN#M?IfbX1TAr z&2HN$IRtM#&W!>8{{4&f(?c_kXMNFT2_NgeYMS~HnDo@o!V#(jFktSGuyC#=sM`VV}@ZA{1jT15n)RXCNXDa zXVN4W@HU=6Us(m>Q-jHK&Q}9Z9HFIW<`23uk1?(q-+UyY(5kXhI@_W1>dF1eI@XvD z`_CU9(G#9dN4eZi32Tk6zcOIahE^(u2s5nhU6WF z;cl!cb`@2Xd3md>F9`Q){2`#y+cE6 zfWv&kZrHG4sOR=3t;b$@$!!cgh)03kI+|zC{ybf9(731ykC!Sg zE|#UuQ|u(d^R;9iHJT(QYG7NXPwDo*=$C_CMNzS$2{y%1bddKZ9dYbeF{K64%>!i260 zKJA=*vOY96uNwOIdr_3q65VlDGmw?&42QzDXubxSY z3@-KRmaA>Gz6sWxQEAK^l#uPgM;Dh3^fTWlV8}Hcr}YYJuC`zH9&y)>n)2z))QH`` zJA0@KH_`XrOXl{)xMZ3)f?+}y@-`n8B7~Z7?(Rwk^rLNw{_Y(%7}$iZMqEGcePJsi zcv6r*(bB&Gi=kbdz&iC=@430HQd(Lu-dJF9iM;ulDAtTF|CcVC z`#C?695I+`Xjx_Ne_WcW=9xdAcu?o7A0wZ`r1m0Y#TmT)V=I)<$=(b|thDM@+U(f| zI0YXmA^z>YrkFl2k%CdMI&Ze&7`=Txx7QYI`r@E|ObBcF{O+OM0fkbR#W+Z9Y$wJn z!t7KA3Tf^V+Eq3XW$@{=ou^=&pq-5eS4{L8n@iZ0lMX}9Y~08jRhUbx&fTgqX55*l zggdT6(_jD}_Em_&`c8-&DuU$Fh|TKH5JNC=_l02wdJd8a{(4jn8LBVMqie*&2%8^#fv#v}F^f-buD|3s2Jl7HL~zLSIx{pNa7fT-Ug z;iG!4^ZSkub<;@EO$z6WDi^zMjFT+eoe|;`|_Wr>C*Saw!MorpHGjU%c$bLX?pw}a8p-Db zBc1tWxca0WvYN`vFIEO|gy?=mb|67@602QZ0l|fOo8jsi+_QUkgO0i?m%51Cvg7ph z^i%L;(49Poe7- zUM7acDxLdjRIKkboQ$^W+%mC0n=Cj@=in$pIhD--e~z#Hp}7skUWR{fNS|F@ z`o6u9bGy0%#62;G=&6O_dVL5B{%r8d(iqQ4sEN7&TS^F11r@}+llEwvw+M*??8b&7 z!=;%lmcW209te-vd6!>2G3Een`k}ZLjhhfw?EHID6Zmh%y*r=1C$*PZP3UPOHi$+Xa zZqo^=IXGb(SK;DT7+qck&Rmmn{z1T=f`_BrdteW71>3qnU}|nUI|ZUsxP!w((7QDOf4=Y#i&C+ft> zX^&yM!8Q3e}nO({O?nFW0 zN7`z10i*1ZzKz!zvW-woQ-a9^*gXbAC*9TZ3FwJWgu(tsFo$DixX$8EyH>-cr%NiD z?Pn)5zrG6Oa=|j^a`0naWba%KQkGBoGN34x=3%M@vWizP|1aj=Gc3z% z>l%I35F;8aQHW8%hDcXX1f+?DE*IuK1XOxe zs`PKH2hil*d%yd9=lnVMb>)&M58Q3dHRqUPjCu5)OnG@6jigPksj%heOI%9~?$(Sc zo;w#GxPY{|&JgH|9Il%sxk3heN)j>P$TD>)4Q&x{el|=xSg)XDxm$rZzJ2=^5V&tcQTP>Sti@A8wbh?ip~FdP(NZ82 za{IY#y*km|W(7sW9|1m)`=7V!-nVa;Fu>_nOl64=tA}cWmRg0{3HhTp8F;v7XJ@0V zj;&a+VhAu?DO|GJNzvP_#Q~OTXX$|(LvE!-y|#MZOtx9Ubd-EeL2Kl(w^UC=XC3qN zLc5*A-!7Gse?h(Rkb2`HAKLPXK;uF-d+dm=r+`WIEcS)_61T}7(>Tv(T7J9|`Z80EQ(3Rsv{Lrx{oOp=+}u$sDV9g^-Zzwz7Ylokp}}r& z8qW@!0G1)kfO=TQEk0(-qw`-{4`#wxxeX(KD_3In7{!1U$)0kS&DpObBYPQ55pn!r zOj-o?_!*&El+gE?58wn=@w^wtqXA$dS;0s9O{v=LSCp<+Fu|#MoMzQ7L;B3NmhaNc z8s+M=z`ATlyYlVIa|VZJ8XiA>97P@KvUuH|j1yB+r5LNjMb;apJe1lb#8`X6gOz3l z9UVVor?02y{g*f3LuNtVp;ja~wVehrTbSz=+PC?&yEzC+HL|mPl)CdtZd$Y9VvlQ| zNNR-*vWAi#VOVm5H+qa1Xj2bYZMS(+IE5nctL@rO!4g%-e3 z%xGdyOzdW+hY@57-6y5bXzuRSH3eSp2{Re3*jY0>wmvSFXd#?bxCDMhpK^!KSe<~AF=5A#O`K#2KQz_m)zJwqXAuQZPUGd=xI zXJ_XQLd(`s2o<*5aXe8cDrM~TCW|8I;0@r~kKc-N6zSKg3D{n-U<4#C3lE>vDA%Xf z?^4XSNzkQRHZ(M}j)8$eBt7MHAsqn^LEu}tZr!?5S+z_KeR~*>5(odP#}`Jrlp0b^ z{Wps_>!7gw=l#ex?AaWQj-n*4oE5JPpRQ?7&ciV0NohD=oe>fY7P>Coka0eoPW(l} zMbWQ0hr>L)su(`WVL+TVbz-aQ)A+ROr%s+Q9GSwdqpC^20i}LpY8k>D=jlt9%cu+}&F?^ugVN zwjs+lu3IBm;QT{hj^oFsXJ*=4pZ|&N5;T9hC4Y!IW=tb>#De_WVW2N9Esc>p^b^y= z6{8b-x?{9bW08BxB13FL^%dm~Xi5t8dQ3cMchYczWLvv-t@6XeE2;_-Mhf2FShqLl z;ojfpq7joUO?5XleeUUrO;5mYu;Rr=8<&4*;g@Cn!+6f}686ofj_4lRH>$BX!4OaaN~gs@!cAxAF@&*Ns(x_>tvjn?pA7y z^EX`oCA{xL_VE`lUm9Yg#&ZSH${*&xy#)%@FLrKJYAgXB=nWcTAl2`F@d0{1hM`_? zU@8)Vma@YnN#c8=j$YzBtpnLG|wkVR0$K?=BMHgVj?_YT91i z_$>t*e>*%<`MUS;(~3*Cs4+Ww>eQ(+xd`#gOO5WT13hn6XW}!uh3{cTD$(w*Ut3{Y z$at}7kDcZbq}0<|1`!CnbTw{&@3sWkvQSn#1GuxJ@CcVzxyvVT+!MxgY{51d~*Yb zjIy#f^|#)6QC==jOV7E>u*COW`o@jW0qB*r#U(xCWi*9W-3R;9F*MiW7^I~Ykfxv; z&}(;eiP%lrWy%aOhb1MAe?R;bciRp_LPkDqg{|d2T~jpwyoE=rRe0@3FlzY1?6jRA zfut87BKgxY`_iRLQU(SQqC5`xHBrHL(PIEIpnzW6EEW5@VE zJSA%d&I(@ewdO$2;dEC1?O{u6hA`m?o?mBYD6UVaWk^0i)`eeWPD3^xa(J4-okHE{ zO$gi8ty`_&Oh8R@T63x|41&kYL3)nYE(^2o+68!M`8P5>Ey^?j;1_9~mB4ohd6Q~# zg<+kxKpXO0AM{@i!Y|ics21a$6B#Uv?;S|rEb3TqQw0+cAEd8xK+v`!G5bg8aqSHh zb9t5nvT$XJp={OTs5e_)oQ z%19;APx*la4X#wN^J$#54M8P^|S#9|7<6Pwnl7 zS!Z?WzEL4TqgshWhYS&;$j&HK3lB>MQaF5@m?#E#fnW>6QbTw*k@_L%@9`(U-*%`? zI`F{7#A)Qb*pq=ML0hp6#?SuL7vTK%vB8$4KW{NPemN*DC%5;*(^&Ncc7g&EWMlxt zGpdeUs0}}MQE=ZDW&Sa(b3%7c_Rs!0RH-1+o*5?8t%yz3`sVH1e(1HSL{gXTmLE^~ z#|z!MM)iaO=(mx8jIBPsT0w#xrB#l}X+#NxQiV1YEww9OMLbfDlp)2D=HpXt)HY&* z-!3JIlDk|SkurW*;P2kOGulbklF?b+49kPClwW2CA)z54peR#$ktoC+Aj}8T`J0b(*p@Ff*K={@&*Sa zIB_597QSUEZ=MHi=e^h{5t!S5yb4v}_PByhDx4&BzW;&}VkSB(C@bfW6U>XLQ~uUy zn#Yi6C^&g_q4_LNayap0zN`vNG0KrQ>0YsEJ&i_!AI74(rVINxP6w^Nb{I^1i#koB zLvTcHe?s*VF&f(SyZKD9rKh0fBf>j`eMc5APb6}>A?zwBqrfduAp2n;ui;Hnczi#M zAJK0T0WKU9A7j1R7ln5*yp%_qZu7ROgO|#B!;MUPVH+biPivzweY7I(E1xZci`bvA#E*|z2{fstMU zL2_U~fhY%rP$ajg0!lBWT6oiRGAy4jzG>tZv>KtN=sEorPQLt%rsPpz&fOC$?AQTx z=G9=}Z3n>bha{>PJwC(<M`at>r%fx2R= zq`2h5UpT?#)KsXJ>@w(Bc%kx8wN@_Pr>M5>%XsEb*czDFbgHmVBd;0Y%&Eyl8yER}} z7cvbqb|K+M@c)c5<-$Z|p`W_!UD75UACZMWv^2UD{bN=*lE(_e?3FxeEJ_S!kwV{i zRO0YPC6^9lV@b3-{#L-|zl%Qo1u#@~VP6u%dScsM44a>Je3k+47d9Lj85wUpyJGn8 z$+MCIt{87m8#aD#Y}aKrSeBcYQMb_UC-1pa-a9Y+Y}erL$zqBQZn0{tsx($+5|21F z>u&l3=`LXmk;xE|(BC0ya-Fj7DBGx+WfRk_5r^7BkFyI7z8hRzT&m=IQN1YN7hl5n zBn>$eGQVQmxjy&?$tWs%&N|>~74RD11*mZ!z@Xw5ETZp6hTx4GH&RZ%{F!k!#ctRL zX+82bjQkOG$`M94Gis%ScJ_sYpML&X1}|k>oxTh5FoNk2=uAgPM{c>LV~zW9Zx7~; zJ5DoiEYH)W!2G1C+| zBqI673g}1frSM0O9)$>6e^S0K|2CxU&(%kz=4SsS1p6Uq1Om8SLwZoMBT7VdWoike zgF9wV*Le{MZH&g_huAGOFsTOV*w)EYss^iBWPCmUT)rA4mPwI)hGSROsC>w1YunL~ zZ5FwHWh{55h*=(GqIE#=>(~m_*$Z0Z^JuS^o+}!!^8g@P18xC{oF(O zdNI})J!6!STFWA~7_t{uqsN>pFm2fL;#;+Q>2_=MC3d1c;*IfPpz7>%nzRW7c!WU! zjW#DZQh8*|{*2xIOH)W!*O{-w!#>E2sI}A!dNfP#GKttrp^65K-BbTKeAe-~acS^= zgQ7JvndQxm7AB9kjULdLp#%9K0=MdQlaD%#ZVk{o5%TOr`wDrlszwFOc^-6P%78CA z_T8BqQuVd^!_2FqZi9c#gVg!G`$ zL8ocXM;>*rwVJ0h+xkNX1K8bixno)YFEq2pM}HQ;q}@7e4%TBKZyQ_u$E9D)Rtv5$ zaXI7YIBi?8IzQofoF12EwsS(v;iapEWej3308uhRoePb{EZyqI3e0nH6)#o8)Y3L%m+^ zyo%D3qm0GDtVyAbCY=L}R$Z16>vy$s)Nr(%&Pvn%TE{#YP;l@0hRDC>YNpyO0^;0- z8~SV;vd(6ojnGKD6u39O<*jOlE_8pZ44zFgY11OPLX zcTJ6ZN|Pritv+SktvPMgBURor8zQXyBulLD&W_=S^;@=WlXw#Pv*#=QAUhWe0B*jtXo}tr*%9xRLc3b8W>2+m63T zf5AU5?ercmNY#o2tgOQ99>Z-Q6U-CZC@? z*IsR8ed{^Y(tO288HN8# z{k-aJ=Xy$7R{A%Q%J5_R?LIcM$`|gLer)QuH@N;-kHR_5V)npQlvdw(`p|T(`=Z%0 z&vq&2>NsUKzE%?Ka$Yf#t>O`D*rQ_`#ucvfxMwnR;7gLDT1)Nu3DY>mp=WWEfg5`U zvAxHCrzf|nTS|BI!Q0K#t(xYS|G}#%{rb7E0K^($5s~(?z~B6Z&Ryx)!R)wOVse~$ z2SM%uX1}&s(x%Gu?At@RK;9!2n02%r+>9z~f#s>e#RUZgs+K$d>;5+!Y~I1x#K_(( zx54wx2q6<=b-g-S^7e-9_1z3df|1TeZ+xUGcSZ2e0EjUrv|2jLb$R zlT?dB6E;;&jhrC{{(EHyQY_rtNzUKt?B7$aFrxv{JG7tl8=xw75z0T=y5p#81k-tCe^J^A{ke0kae9D+80NvtR6g?B4usMMKI=ng z$39GKa~|SX8$w0Iw=Vz;$`~s-C zJj+lQ-|CXq$GPCuV!J!okY$t?>c8u_qKWAG_|7$f!v#Xpav9N`)13N`9fsFVbXJJi z>+LN%ZX#yⅅql%d4$t&s#{UL}oOzjGDOQzPCssINGF!kU6t$h>osD6#YrHI#!@J z^-X{FMX2X$rCgZNQqg^{&b7#NeHG&2^;_qiq9b)0Y2N-Q^Q7`>DxG}irj@N_K4oqtko zBKlsQ(MZ2>sUtVD;;MGKh1ZOi!>fw)`_-}Mhc65MoobUhW1oI)T!dYFzuA7Zn}+WG z(k~}}m3M1A!uV$vf2*34k!t7eElwopZBIp~q6D2!c^b-oBa{Hj4_?&N{TX-qSO1En zpD6-~zynI4J(2S}1yK5!o%})d-v2;|ttwX$^C= zmz<^sa7ydWXqqN9%4>8K4Cdpz07L2)Pui;vXp>Z39n&C+7~3uSM-f@KV-#6{FostW z%5jxEhi!g@hr;`n?^Q75CdLsuhB-IZN0K7P+*sR6{S};NMvqseM$KY$>$h;dUDQ$z zyGeq7UE`slDLY>7+toQvLt3IcS?@I2SqC0J@@l((a68!+bGO7Fzbt&$qd4aCrvl$K zzKvi*9WG7c!8e97b*L$cl@3t|L_TqT!xJ!>$W7UTo-s5BoLf6A2 z_9=m0uO{In^Q zyWgAw_o?o>LpxPm1n`I*-Jt5Qn3WsY4YJP0B!?owE=FE6;^MP*bfygX1-11SkJg)V zSZ~c0$&FOR5!lesJ4t5joyatlk1@m^G#q>$}Vt zk~Cr%%e@3n1U4G0QjQ|SbSZo$28v5n>KMR#%x;bw73{%_@TTwmaKDbOE(_^tpj<6# z0MuLFM;q6AX#O>!WNci-&RV0QeLg_68r})|fbZ3-KZ%ge28A;AfIHvf-9tB0Xe(nH zT~#VS^aFw}vpI-xID3rJIRvxXZ3-fzzP{{gB{m61Bqe(rd)=1Ah*@LU#t>@N8`;^} z-BBBun7A)rzU+<33GmFg1t;BSTOmSJ|a*d6G>MZM^MxYQe{G&l4R zwM#@dk~m4Q5sXNEF$~PZKARvY+t3U!x=@h%{5m;d9F|WOJ5u@bz2Jc2AspwS+q`DY z2`XGhX}$0jWM@88Y%-php;(01h)PBeR=2WBxq9_#5r*W9e3IU%bWj*pId=gXOQGWm za4;a?cfM?A5C_IcptsbU&aePt2_; zL6sV6^iGX(e7MZ-xXi@QGG|+qkyu(DMa#nI?u*98#xi{T{v~_+AWhmu9G_CZX|ZFG zk%$*S=J4UGG4q#x?l`(3-H4N5_sU#fYG?plGsU)7eWRFjPPfJ|`j%KwC4>t@dX4kI z*p0S|u$Hp;@=y_NQmdm$xS!D!%_u)tR|@zGcd7VuDxw++;rfanz)^l6+76|N2po8> zxdVG@^YimpdGs#L`1D}twt?=rTS}%wG-$;E+?4zVo$N0;zBZ=aRX+5nQ}f!;T$aW?2v+mH3d zEc2`K@%4>h$p3MDu8q<(H=mk+(5e{63nF=E^E>M;*+uQ4+Kxl(J*Vvw^?vz%5sh`d zrJLwN$&FahNZk(%+MKBIOU5OJJ`+DlpC5^^&KlO2^ozp6@2v*}i5 zEO`PtHX2-4u7FRvozRUii9E<$i~U3=R-?Th5HP3UP=ul)0;z^M4&6XoG1y$|z=ggw z0Dfm2K^gSx)(R$l7LT1#iktbSNGC}Lv=y- z3>Ct5Pdw@6iTeEeY<;Rsko-B;QC9xiPV8nOD(El;Mt&LFDTTJqmQ>SpkHil@u)YN; zB!$Ae7oAHswOC)9!Dd}O`)-znE4DPpL3UC$rH5D1*lx;rOduG0uUOQ40p?mJOYWg% zU5sLd10!{LbxrXC2qm*3LVI%@=f?JnneHK87I`xJ0N(Z(j?|Dp7N#(3Rxetw@V-t0T&3dG=e$qj`h-Hk1p#+hE*ST zPjr6SNxCZG*th~TX&3fHSBS0Nr8X)o{FomxRQwt^@>>Xp3<5^0O5${hrG6XxN+jUY zp(g7JX>>=>;D#>X!Apl@7&9tVOUuB`i8k9^jPFt|@#nF~szuj(Bmr>mGx-XLObjs% zY6zCGMQ$q@5LjUm!lsv#(X2lLTsAN<6&sxzA#KitESwJhNbv^+-YMI-f{75dLYKWU zGO9mkXH%k5X=ucZ>uIh1%fq9t^JZj6f4`X#KA@QRK~%Xj1-mynTtya(HP_y4$@80A zYwa-DQ+%e#+^e>Vv9IX_?7Uys6!*m!=T$P_1OSg>$L`&Vc#-{27FD!w*>Z$N%$T&l zy#b->Lgxy$1zNq_=z^q^G-{qplXk9Q8(K&@Hj5ZPcq|d%s|E(9l6wMhiW4yUT|Aut z?p7VT!-M|qYdKg9m_MpwweVmR*k4{z@Si*$0xbqzx( zEX6)Sx}8+ENGj?5l5QuWMzKvi8h8q%E3#40D&kdPVKDdn{Uo;(b*(HuHTw7!t_7=? zv?Nf&NxY% zu|v*ji0MO%t#O00iVB6F={D~oVL7=nAtne&0L&76Sf@)6+I|VRr01J0gu<%6QlUxh3&Wx_)_YGAVPRf3%fPB zwuTC*T?`OItiXXHUFmK_%T{&5r3%#jxcG9#r+LJug_G~ck*~g(N41?GonF!-%B1}| zk8V)<5-xrHgg)zr81;l8!~~>Ma2L@-F=<o+i&3(xj8v# z8y%`h0B%}L`01_{6T_WYWH7nd2G)@mbj7tc@mwG$V!)!IU2j7Ai1osoj`O(_fpl@* z`t=bW#xbYSaH3?4#?yx!?-arrwbI7*hhxE!M3@||!HcvzaXR|Vn>h(p7Yw0d(5n_3 zf0jCUnnRcnErS_ERIpFi)FiJC1_g$|t)ouM>z2iY#(@o?3>RKp0W^&kE7$M3Z&CH+ z{O6++_@DtiU8paYysX>G%&e@NVPRo(+kfmr=C28{5Id|NW`N0}-%OsR3wC_bupA&F zQ7iMksyQ`vpPB4G`pk4Zbz#2^)Zh|OApD&C8uB=nu35O@e|sF3fKiBQ>ipUTMqkJ= zUKFy=S&B}b0nzrZ+;V^&wdvFNFZZ9uye%P^-D{KQcGR>cks6zhFE+&} z`Js!Ms-+dy1reN)qtyte_!5L!dA2zxvs=Rfh7F_71UHm@hU_)#EJ^k-19b;sG$3aP z`mrKU1cR_8hvY8;LcFaPaZ;rg34TKlST>hf?quvkP3%_Gsi>velqse<)10n;N^BK?wNcB;0qzJ}*m z3|?9rw(@!WkX)4ONXUVU$BZgMxrk}UwUs^yelp;(a9|pYw%N>7l{4b!vYAQiSwtHCKkIt6!&_B(jM`yXyZS*NcOf zN_He)v8sWB){X8?Mpl*#SO@b?3iyXs#lLNgV1Tfp#dQ}skIKXFaIyyUPn@_*Px8?2`* zow7Ky`P3BD8@DuQHffnjE-bFU-RM)KpX#m^dxTv?MCALLFwdKvnJR@DVq2hb=pIH> z|7Fg;;HC#?YO!aq^71OfwrB@GKfw4=khx51ujFgePYsrW#mGF)fW1-p%1u?z7tNRLo0K9Rwx8zZyE)}oQ=#@$9O_sq((mn7 zqTTGW{qvnu8=xZqO09u)^0&vQB9x43x1dSW!oJ#mSp^S=Vs^#es-+b7e&qM4*G0nr+d-lA8d4N`G<%3m#B>tP-g3!q8 zcT{Fye^ZtM}@Sbw^~12IqPRGJbYQ zRZZl&)VH+U%7{--qGUc6`Bx+7(Hfi*0^}w7h)SD)azi`8KGmRqGBqk|BsjlZ;D2!?;YK&tTO2Kze+@Fw7sPV*MkK_UX$JP7hWU;1T08tw2nw>4`#jUFsFw(P6llN)W1dS1TJju7@;TB;wqzd6iBpU8dTVp_%J zp5IGBJ`C@KU96?RsNQ`E^N`z< zAygcIo$y+T7P1$X7!KPqSWo{nt}*Y9fhWOKc*s&=67R!N{IQuO12~8XK_duC9_g_$ zy5R8T5cpV3efL7a3*su0dbX+~*7{fL_7WE8`**7mq3UyyH-EJ?>L`!Xa#H}?n1j1%Y48)B3JrZxoTMI+h z7>1mz3w!>^*Q1_b#Rlq2{y#lO7GGad18YL@RQT9o+}x?m0a{L>k?pxoDLu2L4Bj#0 znj2Pmhk{+%TT$g7Q*i~pI{bdt4QXb<&UrSk-TSID*McEN50{Lp^K=+WxQ?E_!-hd> zfoG#bBeQ|~+?2DLSMC)DyH3#u>s76)i^ zDajC}{&y{^wQnm^YHL}}%Oh*(- z{0iqYx1tNx4@qV30~YU^?@oIArMYf}thHVH8XblNo=eIPmUSzg^aqIJk5HgUd|0w- zb8<9v*3JJAWLkY_ODVDux_NCOef22?6Qw^WcY3>H`wbg7h8ek^8b8k~ZF}~3wT$26 z@us^JUW$0_0f-g?BU{N2aea!itCI6!BE9-NxJ02iEDZdhY}o`tSJEOWvS`JphaM%( zW*m$cbOabopH=pSBiwvPhR*$5dE+9`zgf~ZYJ5tg`ay2w8p(s#gCd@S@m)z`$@%yMoc4rO;^xMhbu* zwRlSTqN!zf^q8yRcMpdKrg6-F~viv8sxF!wbxEFTb9i5%i@_|4w$B^ftpGz~d+Pr9Sp z(F6kQUpb7usk?DxWZ;7~ksyG4^V4PF`A?yHbfUPp7--`|4@M$ggr*9D1S=>-NgIT~ zh7O#!CqK5xRrEC?`44$I;1<+3>HFK@DszYPVky@D`Z$&oJ@(;9*tS{jXM z`9er%GaVn8p~nCK7g+ts6+6JBg+2|^6`H3vp+@Uz1l~~!#A~!k1|_*85SbyQ{8H#V zeJ^||TTT-OE>U2Dh9Y119vTa7V(azgizO9*U7FwjRlFf9rOPkOpnyZxHsg0ne&&uH zJG%O-tN)i1+YN^cDkE0FG9Rc7^~j$f-T0Q+f_~m1WH{2GoP-FRmw@WpJZs@w(7b(z zh9rAq;I1hH=ZUQV)%X^do2979$!x;0BlNJU#~r=B@N!%@#MR+SxO~J64Q&G0 zRxD{vDP$LA&l8@?b)5ZXS#^Gu!^5#s-?w1~R$UiKM{wu-+nRy56)@ZA;?HrQ>Rj-D zt@I8s{vLG;>uF1dX%W$=Q6+ff8$(J#Bo9C20FCb1MVJcsOfxnBE@0gLc$s9$r87{J zeMeUKUb|TBqs21F&sdoJwN>5Dw3*T^n&iI#GihRH$0GS0pXL*v_GsZW^69yYL+;`4 zQ{Do0ivUP!AIhUIN`%>PPm+aNfT;LT1w}EmS)Jc=af_rz_Jw0l^Rb#eMjYKGd zHE8#?Vf?7Qs8QMP-5AIo+IiSve83MN6&s-vQSQVMRUekI$_>@LM^R;ZYfi;(@(&5w z;gFu-d+J~fOS-XhRe8>z*@p@mS;cABbDk6myf|c;p(7`^W-gx=+^+NQBw2dunbPJ$!0eJV{VQCRO2gXQ(^ri`Lx z4v}v@9E_5A2T428hQw_CZd$mmI7}?>6|^I?f&0$Y@6PQP!y>7=oB*3xZUX*cnFiCH z+khm4)H76l!8zF5aY47dZtdEwXr=S();~P|`8B~q=o8cbB-#eV9;*>!S%-}%-oSY% z0g3<~^dhb626^GozdH@vx(}F|ZqS;4*?#uYRHtEeMzQ@sQS;zbkmLTdIiW!7wU6HV z{XwS`$G_0Ds`o4%b?&}3RZ#PC<51eqXV0xj7e(OYFnez`P+1jEmEk9zbg z$CllTlBoW=y)x^ll1ik2-B3hx=jy^pdjBD`oz!lst>xRXyQ^o=@Yrbi{Yayol>@hh zKK1()stPtn`8gQAWX>L)z9X}w^1c}hieBFbr91X)m9A*CiD7#-+iXwg<=;5sbHrLb zP{sgLQ7a+aRx!K>6-3u(75`L|7LC_VVJWNkK5EP{NxI4{KcPt%`}>w(nLCXqQev37 z{I7mZ+nP5Q*LnH5TST$Sn{=1w>K_}&WPEN`1hLcjS1(J~ z`*$U8%JAWgbAoBoO--L8&sr|4%AGMX3*`A=6nQSZU_^|YTPLJ@&A;I?n>V2ch-Oq= zl9_T?Fi#kVFJsKj@+8EVJLLkl5OHh_MF74r|Ie%5Oq8XYe;Fu7 zy-^%G|KAeN8u-k>Jcjto@E%>X5+pU#dEuYNNeKm4MDj=e9)m}%p$h6AezUSa%X&G^ zraZ2zS!vyLY{5#Xp4cuCJ_$(Pwq{2Gh{*Y&AU$O10kLTzKdBb>r{1>d7cA~+iUUvu zySdgg^Od@5JPNE6E+njP*0p~w5+4N2JVh1C-FA3x?b~3*YkOY!fry~bWHTyuYbAS|AMUXX>@)1!5HQ2v7JSE&N=@Y9P(lv3d{+CGYAAo&d{4U9~kfjm_yfy>& zqJB`-*P^^P{jXcSl6{!uqs_1@0m#| z4e1F!C|fQU@nMtgt43Cj7^^gSj`5OfVi$TR82y$;9Bh<9=w&{@H%)ROMT>*m#}`a2)@TjiU4Zbf zYpoY<)22s>XO-&GBKYBEL!fVYznSR2g=7EbLOCz#N2gC4s1=3THz%9__O;AmbhfJO zeU_7ZpGVVn=VvsL>bHfWOtWcZJG)QQt8skbDsX@AoU8qEipeh_I1Rx!rJyw8e)$j(-Ei;0+R_NGL@Rd!c6|DL%R~ccReL z<~6P5REewy7fwY_#j}Pe?dq(hq5WCM=H%$gGlHIw0Qa{eN)#T5h@Or|~9XpeHbirSusR4!q9G>ZoJ)*eisGZ^fj* z#TLH;jzaa%V%k$?{~derF8VAAxd~$X~pneC)o* z6T-O=as|Xv-!_sUY-k43;1K4Um4NXGiyzQ>l9q`RJvq#_!WN5J@+qsJPc%2BN@-Wi z1k)bBj#ND4Puj1Tr6BsQAtU3)x%yNI{;Q4r{72&NE_-yL>oMCYKhnm!z|?#{jN?;1 zed!e60lJX)iD5kF=Csg-G&uJ@_kE5Qnt>wfjne9kDhp?JV&(Ve7mqrP8x7r>yDB~j&ys4f?{6+kuurb3H$y>fsZX>?>^aPF8$O^+gnnn zw1DWKT4?38lCkvIabrEt?>h=sHt^rnowQil#uYEQHm?$l<+5GANg2-EVuRc6jvHTC zU(}lv#*@$bc4&6f#sb>A{5&v3S+Lz8=luf>(A)xH>te@$OE&S8k_*^<5RGe=#&nDQ zAD%)v$3nP$!<`kS5OK@Zy+@me6EE5$Qc^%c@pn@m%WRrS-}#y*a)2LYd5Ye#6$8@8 z=vRFC^{L%@ZbiAFPPS-CSA`X&g$737?3QV{BPHp;CZ4Or@>ze=CgLg3qHFc z1I<(uKO%26*J^HVCZ=+vB(~{#``jjh=BJ;M&-^1Xbv^gex&=aY~}0NknxqVkPRH5k+|U)Kg-mh;LFSsyw5=@s9Ka zl;=IKP7tr0b)bF0tB><1B+2B^S+{8uM}K3wKi-?JHTP~sZ(%aT0$Rt+oYbk_Q|%3BA;5YfsiTSW6BVeu62umxbwIh`K}+&dUtiPme}0% z69@^4AzwbZ@Rqm^3p!0c#h6KnXS0*0^ZqdH{ry~^>ck<`_}8ksyu4DF8j2 zQ(#49W)h(R5{2(aMs8^AWCW*!;WIf0qnvJmYsuT^N<3M@Bj>!IQ{gb!eN)x@`L|U% z?>k(&6hLh`&40dc(SpuZm3S)?7B=zA!O!TZvvTmOr^OxB;R9++4#&pgXwdf$J}7Ax{JrJ z*Y^h)=YDGOnd`~Bcu6&5_R09s!J4Goe;EJvCP;Eg!%8Y(8))4>_8q*Ye7_%_;m}A( z0eK_0y0y&P$7fVx7%ZW$Q|m0b0kqX^JwUb0Uua|i{DheS%3cC(LpAr*#dEXCOPV*`9QY9ICZ<2TbTrNLnP z$;ahUd#frQyVH9Iorx`^ab3VY$c3+N48oA03=bReEU}?eCmh zT{N!rY;MwD|H%QdY?Zn6RMwi>3YUb@>$SZ}-NSie{uZ+nM_mT#2lRHi3``B@C2tv> zQQLa(hV+e&yrsmQ6?Y_%G=ZsyYrMq5mqlyQQfVhtHqgOK@zO1>74fm;Gw-%h*8#de zQKz}s*<~=Hf>jdAxZ4zB2sQ zcBVJk;bLS@%M3Xf|csc6XU^uV~d}e)ivu-sD*u48JR%6jV`pzMFwt zZ?|86{8FPPoeyPw9Z*&UYu=1#+rgJ@m)T?$*s$R%!{>6&)91gQ7djz%`!AKX_R|hS z`i%Kcc3R!0>&~oA5h=BCRjg~t>T%sNn(J_;=TN1k)yugTSE|R4yDhPEY~7kES`^tC zDayvuF2<3>T-DWhbm+^|>xK#=OvCYP)pf%KE&4^PwOu@Evgx}{u#S#Kw{CZE4D-sR zZcGa;6Y>m-*cZ|c*Ubm-*^t=*PD(`>2gnNH3e}4d-_trKBPHdzl7Uz6FEZ|iq(OnO z8-%q7Oozzqz@cgt1&m3VTzNOzogM;)T}aD+BhA{s?;b_7s%t79)W1xbD5BTQF}*?e zabvpCm7MJ8yd0tI;HUR$^?IBeqz3*_IdYSZi_PuYdOxdh_7bPxdZeBtX&3plh=nuP zUHs@ERK~imZpVHv3IClF+_#&u44*t%VGPL^&uSjNY(WMe?x2PZ1BnAh7lyrgW_VKF zi_gvpYqwvJoqC}s^;Dtgc1^Ft&X2bOKLlKj_=$1VQ!%5wKH*H2>r1&>vUQdZhbRbJ zF!_B_?AzKsq|IPw(xD<$FG+LY!$iY9<+jh;yp5%pzD~bYxp|v?=Z4qEJ>UD7$PS+^ zZ|@FN=w0*R;@xN4x2t=6$36zW?)pMa5DPB3#$riSYE=K|LZxvkO_ZEA8T@CY_8x);vTYss{sGF;*krbEyUDs2^I zFOzhe+$`)Q^+w}rX9T_NfNx-=Wp)?4x53R|o1LO&CvH8u{wVN6$g7CMeAzF`3(Cv- zc-R__@szOqc%S<^E_KQ{$OqPpKUc34bs3pB!}sI@_t%ikcMfniDxCA*=VjV9uA5gn zuAS#!F*mCtsx|60qFE^^RME?+Fgx7o8X!-j|0roQSbpOI3E`6g|9cFcCA-N#PSKF-UB1sJ_-LczUL!#h%mU3A)+$%^AndU<*#!a8Fo&ss_TCNjWgkBB?NQOfx2qST35 z@fBa_c2`^~2>fY^X8e-fr9jP_DxFDm&RH8p?Hpmcu)$!1ose=GQzJXs-5U-VmA-m= zgV&&7P50yb*KGo74l)HzdeVJk+mm&*ZKJp3kc+Q=$sI0^afK{>EvFV4q4Mq>VF^=Z z!k;(vv^*2h-xOgB14TYrYAPfCT395BPKZqUq8>j|Ppeg+2omMvLK@>~2MF8`#?=8O z??EnUhqtgdQEx0Cj(DW;_*7+t^lyv;PN3}sCGN9z0gV2tkhOp5qHu%!@cap19-X6l zNyFYF(N!O8LP|sWt=mU4MIvi_)0EAcazD+TJfCSQx52I}`0h{%kFH_Y&_`aukpr5^ z)?ViXQ`GL<6EvSKjnkETwoCHUognrjCwXwgLWGRLylSfyusPeewm5r#QjX2IY~>Sm zg^h1dxH>lVoo0*LE#pRekxAKq=#G$1m~LVCO&>;9;u2P zyKpbv@WNEXWZv-y!abR)QWfp_p`}Zx>$x`CGOYB$K98TohV;+KhgaS%h5>E&Nh+eb{pU$^o5%&p`Jnk$>0lW^r;zRYYOMKf=9%3;7?u)pf(KXUcw zoMti%D|o8ra_vK<6LuCGmy1=o)Mk%1`|TfeQORpoP_vg|${kjkNv&m9l8yaYwZ?_n zg~z4XpCjy2dzMf}A@8^1qdoiNxda6@PjSBr=y_f6y0BzGC7Wdj@8OPt5RR7V;oD`a z!z>1JA0pfNBvP#{EH>gxw(HwB_C-uQ?HFp}sgOq2Xiy(}a_B?UAW1P-9AGl4^L!X0 z?leb47=GKp2&G~aR2Rl4>lwJ~x2i3?KNRl8NG;$7N(0G4FkqXMwA5pY2o*2c*-3+3 z0WUS86H1`0{8h!k?p4^jwW*SycyBokcmwfGFPc8fN7&qkm1o)nb4L-f$f^E}P!+WyAtjmuvd%k`7U}!3_#jvDUDIMC+{5Xtl7SKC`?M zZ(WywxuKiNqAb`%7;lQ4Gh$Qyr8sUpmvfN6hXbZ&TrKHg+uRS7`@87r^IV_m_c=Xy zR7S(Y#h_burC|r3SW!gbGheUOt9>s1G3J$+s^|A8PLypm&3+%P%ljh#y69@BzGax! zj^2Cc#l7!Niu1J0q>Lnf&=z%UJ+!gKF-OkfT+sB`h($_PmC1_UKXxv6VD#l14Dnhy zsQdPy_}sIxq^mjWx2+W7^kin5I~eYj=q3AeGm9941y9L`}sF|4Y6u9(Q3#>Jvq zUj_2$IT=d@qiD}CWWXsVbL@d@`45TePQ|R!5|wJwX)4>U96eQXn}c;UFNQ~<+)>7! z8~AzcEpHTtB}2KThJ`m6*D){~GxdkW(+4W>WJtQDJFOeKu}WV=C+{^@n;;qeHGibg zE_}JA(c0SD2g-bfitzC8(TCCV|1J)CNxXLYvE^M6h45NxcA0Yugbfs@{<9{Vgnbsx zI>d?{JBcrqo>PiRNJPR9I>s@I?oXj4oahXqFPv-|R;zzv|ofH=CYT$!Z$lmQe;3 z`ah%pdV72Jaj&SP>@DRcL8rsxz{Yvp!)c_aJn*FXNOp*IY1txYw|l(k_v`-YJ9F8q zmd4^v&!s(Y=$x(f|f1d?rlbSq!Qgn7$5g7zz7H={-sYV?|_LHaG1V|y3!3U9YeR&vy^Kfd6{jkRX*msOL8XQmc_T+i^;iEJ=IpjJSn?x zOnauP-N(cRo4QLm2E(pJljVpvRZF{j-UZW_^g=rFT)u*%-2kyTO!67` znA75Pcb&V4kD-_J%c3go*U|~5FI;mO%G{>8C9K*F7T*|?66!Sx@ndl)#fz4Wd-#g$ zx|}dpxFVPT8r>fn{ZIn&mVfbkz&kcv>a06U>85R~BINzA&Q_I46878a zeE7NrR^9DRZQY^D%;ethhXrs;uX+UufJOwDxzG;QNwUlx0a*ru$k8Aw?)BB}9HlZX zPFeAVru=0M(mLHeS|+UfTbz6~iuu1M>9shZ_Wd17`UTOZG*@G;;i`czIIeEvc_9an z?nVAnOPd1BG>=0F-xfQ_9y^FIoaiXw^6eThpIF_v3ryretX{x&-6p{b~Y#AE{+5!MS{WKUdTn#3Q(f`-Ox z(01MaX`OqRlgw7;v)8NknJwo$-OI09#nL?QsP%IA@Uy0Im`jhdzPz_)+xE{Cd zsv0#(kk7Qxr5PPi|6*ihho&$RZU7i}U#co_%&-8ldP7-nei(-PK<7OK$W&zNmKMB8 z8^SR$BQw*FX!#}ECV|4Qfy7zvvaSNS|0j4P9N2-doCzX((dWH#pd0h!(7%yx9_iNxZK=^OUsnD2EZqWsk{`^bEZ>9z5q z?`B<1!8-%~1&~y-A|BN>9^W;q+!DZ{FS(d9VAHFg$t7C8{HI2;B)6ti?RXjPs@9xp z+MOhN=RF&f<{FRZmA{ejN$kvQmJt0P1Eo7hk@2Dwqwn(J6{LH?=vAO3) zu>~cDwsZ$yTrPQdk%{M-bC}$@%d;UB`aRo@*)LO+^VFw(qVFifMlze$)?}gOu{Yu% zDog{<;xGK3lhSne*}o6Z1KXP)q|E70-yAqEu-8W6fQ)!q4ezmZsAm>zVp&6@FmH!j zoD!(Mq~*-qh22x3(WaGO`L{}L8$J;6x2lbKQ2jzo1QJoJ1UIXz^RHgZ(OvD$+#Bw2 zwKpruMmBrNDOa{HX8_2kAUHXvP812&zvMP0wK>Ru<2n8@<={o8tv-6X{(hB%;hv6j zGxKly9xtAB*0K+hMiYA($&&Wi$sE)8_22yX$A6QHHkr2xxWr06l1JyzP02RN=N?FA zcjm4y=HgI};<&u7cD-VowbDkf2n!r=Mh zFEFaeoCvnq$>Dab1gf_hNLD8Z{xr4k?T=egxB@C(XcIxMsa2o_X9#jX)&FYv3L(wq z&OzX#@bqoyXao!rkYCVr16ua~!P*tkEVhFE6@g4>g$ch%rs|?YBz3IF+NjWR(u^NU zoE}1K5TIwxB4KQ)w7k?E!l0uJ6MFy%1cK%-YKJ3(R`eex89u5>++1c#hp~11tN1>N z_ij&MB0B#iN&n@-$RJNdvX;4*pk^{6z61WZ3qZoJfd4X=J4@~$TuTr+$o?d8>pmC9 z?h0SYxtv+gXgTvMSaYz#vK$}S(>gxv5tJ-4X4V|#YQOY69$32Xi|BhOJGwW$_Ni_n z<|%dbSC`gmQ*K0*z0uMXJtjZGuqdoc|GFmhX*)|6wfC#=lQu}+TX#U?xiG?B;m266el!+nSQVR7x@ z?YHz^@hE7HWl{OOQWIbjBlKCF9wxG*t!>T#wuCw9O`EdMk{{n zaWya%1B6#`)$sM^(`N)&m=4BXSgId?hzn@7GOubJyrJXV#X35E{3yw&{-j0=7_7;F zxtv6lfScx zFemGCEkYIqBKOb;4vxyp$6qLd44`54+k0Sq57}xo8!PU2D;Qc@ot3c%O_oFZoEMUE zz;Z#odNmKi6dVt^uRILG$w=jC0i3f?z2b7C_jc{&W^;qkr(0()5N1d1{@kmoImDuM zBdfBPd-=vP=>co2_u*9WW){Z%kvxHC6dylcQg*+3lnzUO^~2k_T9Tgro+gW@T#k3| z4LIT>S&f!12UK;%+hfC}-IU}qCDzdtE#Fdd7WQ>e!j(?uOsTH$?o&*Rm_6 z>a(fTLXmAVBS~`U%ecD~`RtXsPVa0N)Ju!Vx(#`a#d6quc<-x-v8w0J2yal3sba#TYNEV)tiFg(5Tm@X0+JfwBM% zaCwge9+S~sEj3yio{FV%p8QkuwK1bt2I?HOROqrUMA0zK5>ol5GRlT+k@AvPkov(0H z=}jNOMt3;-Dr}5mW8`VuIb}iY?Zr<-QWb6s>OzsOs%M%W?NfiXgITs(;lPsSl3Zg#+^ccUNHh7Z!NMXvvW<;I z4Nj7ghNE;XXjeih7n`nKS9eQkgI}7ie>AbD?;A&zf|J;KzP*ReizsUizign%P^UUi z|L+*f6?E@S+VU<>i4ScPZND!UD+WqnZTG{+Cr&#`#sk(J;?yw9YPTZ)^CU8z;118NL)Pg2QpVrrtHUazXB#2Kxr#bojB0i1m zS|&+${|?>GtDGZ`Phq}*s>DhA!DGLOYI+wvNe~g0Smh~0d^5KNw9RwaGalvV=S%sY zzS38pIs5Y6ZwR{ce@1GT7=o{Siy6cUgri+_3ZT3{!I2?Ox92FDrKD5rB|({X@GWS# zmwCqQY)XN&Vdr48q<{*Rpb71bk}5#w6B`4)9c&`;mDReZn+Ci%gR;jT1(d}kt?^tnst&tpT(FP)Q3 ztiE)4hr*p=PqvV^8{a2LB-H*9cJR;r6BN9}Y_kykJsp?NhxYTnq>40U={dGP2y`jc zsiGJsD1T)X4B*(7UcZhXNZirBwH*Ya=(4zI!g7%UvGnC}ubB0UO8M(~Vw{c*wPdma z7Hc}qzTGIkHASw)%PjqaW1KkWfPRyQ#^Y!CIiGmg8K$sN_XX(vud3zYGlyoz800mA zC#l?SGwOQ=ER73qFnS+92Y0m|d7_I5g6OB?t8cg@C2v_bFHVEy-o=QQf%ysfSxWse z=eKM)c67QzAYTH}VktmMZbGRLR;6iIT>oa}|Y}HZ-JG zi1n~pO@%U&fT3puS@hJy{s&XEy&0n;Z0zcSAIdDI9J&bt$q@nS;9jEuI(x|6ZFY=3 zf6ww&1lD6IyKJxmKrp}DTN{qEZBaAP5D2*SiBM5KXbI|@VQzpBZGMM+4M3bar<|Sr zf%zl~uOaP4f#c`G=Tn_K#DaF)e)oTe-Kp;*j6bMXTe1D_az1j53zU-xB`+6_B$?%C_q9<=^0 zL%C(AIJWr0Kl#?<*^=@=yL}pL;=ehX4{wOD7`U)N=a@HU>BFL^w5T^(S7YgWkphX0 zuq+@$D+ZDQcp0#oM`AK)jBmJ<9oxW^;vE>VfNM7G=zRwxQtL0*0DdP`oCFX!KffJ& z>;@|~T6C3wgffF6NNL!E&*qOgM@9fr_5*0=2}KL=;j0BAL_BCasv^%SgvzqHp09xA zwd6Wv_J}oR;jLJksGKB0^{vKYy5XnJ<;YvpIqw*JS~%lw3e3(WE^fpOzb-oR%qtp0SdA~@%>9}w%C z?>e~eFn-LLlQ)CoV``DgUH%@=qvZHb)MhDKo9gMO|FZx3J6Ccm`IADEbEq!RwD|a_fw5L4t>;LbE%#NZ;jm1PfY4e z@Y$NwULitln(r9{Coe6jVr(2GSCdDIDT+`x2TpB;Rxdf3RK(mY8I!3ZyOx#nMojEO z#FQN;3-rnL`1msuYVlPq4k6s>qA{D?%3HoNVqCiJTWpAS@|f#WBgNX+58TS~#VU;M zcdYAU%9UGY6go|vdfVc|e?xOT&hl~3Q6oLy!Lzv+c|-O37Q=`8qoZY_hnjFc%YzG> zSog$5Xb1FV_EzFz5=eLTLI zcWmdeZ~#pNIPb?`hzUSo$^r{p1oS%PJK)sp!-LnP(zLy6e z=@=08LCEP6^_^zR@~-;1;j;Akr&+4T&3zcM_yF!9c-An*m@u#$hZ_n|n}=XM=RY?!7>g`dL1Re!q6+OB z-n|uXPJUphM&d&d)0hP;ke~Y0g4jJ_(Bi5A6V6G9S-;pvVYoQ07sID_ImekPEf@gC zJM38j^F4Gcw<_DKT->{*MrP|$n5f|5S990d7j#$4+MC)HxY5}**_w71KC?If<{C4N z!ya!@S(PohIM6%d49TH;3`$50!_Tg(5703(95WNF6vExf_hkMO z@%Jqf&^UM9t~65qXyKF5eNA_?+z4KoXP2~TC%rt;>7q zN`r=YR->3pj&l<=kG36@>L!4~R1C5ry}D`R#%d^jrRnq$SqnckP=h3>HW23kr^*My zxIlGjw(5w1XYo6zqt$@wp^2b5u(67F?A{#(B4@B4lv#a!3~skk$_biAKp^ZbP=qO} zBH%CTk&f8C*!5dk=<8ifms~7#% z?_3>)VJ-tnA+FE=e6f;>*Cg+C&?a?uK8&&5*N?LYPRCSE+eI(KkM5y^nL_yZc4xkx zSg*lM#@5Q=H^%hz^zuk|Stozj9Z1BH{JJ2eFzeVHxJ0H}Q~Q#em`IR4WtmeFQ36;G!iE#f`Hlg>%Dti*e8P zi7sUo*AhwuhmlHT!aQCT`tJ%6QhVgt$w}Jo7cN|Y!F2TYefxq3X2!Aw0u+)z$49(> z?mAx|Bjz3bw<_{ z`cRTlr4n8<>-IK2gBvx3i+ksL)QdD9rBmaBGcV54Q`4y4OLbZKdQz%79K9fZopX%J z59*^)`!7CxbJb|}`gX^&BBRU(dg<1(sY;dC7E~GHuV32IGMeI0YGYPW=l13-o-*f0 z-sSw-vU&WoY2BEH+T6V8l<=Xrx9^3NlBnJX&WLg5T+o+i|MuFxqbF7)KkaqAieAi< z1V#1>skh3Frj$^vadY z%E6+z#yRQ!RPClSbUi$8$$WkO#QumXf}U1Nlw4BQeOLAT_V`=zXVmibUPk|25hS3z zW!T7oLuZFEwh2i%`4|90860{D(H$jFP!8S=hw+Fp$W=f@q^eFEfc`XD!6R<|B)}sL zI3QjlQ$BbGbYd=nLb@zaIHcTO?FFJ{Ztw>m2tPIrBpVE|w?6u%f(%e;P}P~A)1aWH zz3rv@WR_X3mia_rq!$=P_y;4$o7F=ZX^r`5Kri~t?EGA;!@Dsft-RdzXLd*JYgvrK zRmIgs`^p$9PnFwgltUfMx_Ajp#+s`^P}G| zE>c|F+}Dtc-RNjiF+{U^K)8-`VQiFP>^n%wZBdXTwG3Bx^+JAT@#mkTk0}|~i!Q_- zyHSa$!%>FfIemn*wsjxB-f9(SV{7){+ijjGa|ye;yOe0{d0bb)8J~@@phuVem!(qn zf!DDgkK1rO(Ncm}PE|ZnOjLPeLAT=+3w5w1@0w}~Rqp2VT3E2z)?%@ylh@_h`|2XS zfcAfjV;cFf0jaoLK@PMOb7haH^MXXdU~@|G64tAJ_P-H)4a~$eVibCf`%hS+PJ5t5#$Y1s~Gw!GaDPE4GYA+<_7o3|f(A7`2cY@LWsY~1AS z#>{C>)!*b{vIq>&dJiuz<=0%wW)d@PfzlkRJxcOlO9~y06c3ibbIB&$A1K=Di!(i; zUl|y2HT9^`{R-bgv27Ta@&L=ETMDeIcXI4BU7 zWLBv-;AV@0qR^}SME&;Q=dXVWunN$ek@j)=e->kN=N+`KnyBSTN;)qW6Wkv#D%gq- z3*~pjJbG5MDWSSq!GoAXpzr93`k#kc@^lwlC!cY>x}>lA5`}?#XPky?Hf%%3#y>Hc zeFyzTKE)7VTg4z!R94<)p`E>@TEX?A?%Uh9hwT#862--RsbQe!@EQKpUlMN|{C_RP z_Vi&3dMX2BAKLJpW;}Hj1pQ8fk+MAdUO}s<+#{QbCg2k{l_3X3KA%OGg#2$V<8kvR zV9&i1b7^_LefdlF@0eBvn9A{iBOq+A$+}NyT7F^mpm#p7$V>#HUVbbwcb&O3c%_0f z>e+db{;>WjOhym5Af!wa)IbQPsST`WMi@XFyy>%_a>CZ+DTi^=HIctTe#-?93V+X8 zofq!P+Z{Gec?xXN;A++{PbxAB>6FYjxO_eW;;&0s^IpD!ImZzYZ0m*inrOTd`)qXG z-XH@Y1y9g5Z>N-Svr`N90Wk^-qkdWuT@1pgkE1~n8uc}Dl%C(F!{LJ-vx;)NWDBtp~NVEm^YA^!Cb&&3d)g+NwY2_YcmTkcMjoFDi3y{R$eqPDc!LUgfLT=POj`$lt_ zPQ9mAGr9d&LhI_{Kew;pu;Kc~T{pzun7%R@(^3Z&1?7Fs`f<-QA^Ek-mluLL9OTXi zCoHJ{xog>mH|QBQ)@Ip3Sy$v}=sWs{eF~qCje{jlLB$Ua%EUKgRueF`l#`O$>{@v9 z#O|k?S)nh&pC6Pf2?{7IOOAW|v|+(r1mcg)4D-S9r}ydTjetST5UhP-H$^CpEgzAj zj>r(|!S2GybJcO@)F%j34KShrE=0K%GyRq=qKciFQ>P#>%w1-$iKniq5Xrmi>nvq1gNEQG&cJY`zlj&A~S z=7Jk%x3^l(4BWykg{KTHFF6egCY4+9ei|Jwo+P(6DfHG|^-nc?f$F1H=6?vL55NqW{bCV*9co z&=UNSD8t!C&|k_0g54b0Htt|%zLVt%{vMw{W7r(Q(~a!q=-ixee*EuN)7IIW`+~eY zlJdn?J{SEtvI8gCSW_^v)Tg?9rS9%l%LBWjo2i4iCaUxRAINGAKZnNUpG=3N7K*zk z+sMOGM@+iMs=~cp{!%<9clp^xrdW+xYx^8>XP<0}JH>F$GtX$&$EkPZNso)-zc1mUTTT@S&~Dw1=Ynwo3NuonqSpO-9;T$vrEQqJ6=3Q1u-ah=~nI zZwaWU%~mWaBHZASfSj{gijfl|G`?b>EylmFQc%)fhQZhpJE@+WbKk88Me>?pX-a5$ z4wdGd%FrK<&NUj>X2Q)xyD|id+{|iveZ-j6=kpCYXb#H1tUhd?_}rqj#EH*WM3U)V(7{`#2Xb>y4md0qbH1UNZ<-_W-g#qT{@JBVgM6rM5*u32sn&i zGe2j(B1sm9TUPc$N+`YM@NmKTINa=qQu@QYE#4 zWTZ^W3is*rjY`<$#&Pthjd!x{mAp}?s$GPB2z>q14#^k*Wt19dgxoQ+6@nJqDmy77 z50s7#Kp+;i=6cUsgCzp=1}*|qb}Be?XTRPgsD|_*MOAv9oSRvm8d*s!s~2(gFSdh+ z+HO*snv{WHI+Y_EEoMDa`LvRE;$rr`iBZRW{gYw|u{MJg`?xY~qiu~Ba2Naf((b26 zOArr?O-nDJm`Pb{y~5>M<79RuBdI#g&6>d9r9&2twEPN7SLCG z2G!@_)|}KRj;m@0!Janz{`jByv*UCEyP{ih2dlmJ&oqln?R!g%sL$FL~;u05hR!)i+VnLr}jmnQ9u3g=XIOqnW z&2T5TdPzhE?Skl7Knj(#&N}XG5J;-}6SbAtymVo!hWh*!)+P)Jb)fk9LUT#q*_8Hz zAAiXBp3W$)IXppm-=wUzk94U&J@d?&!5PVUPcu^Sr@?X-r>DHyE%$I!RI-zK=m%;L z3*Jkd9L!@e-HuN7OQMP2Y!igKrq1tgj`iMML-A)IL0WWpbAaf}3*^X6`(KxIC=!C5 zX1;FN!Xl^udNMFFfHV{d7(rewz7p;L>J1~4BgP5|60W!ql$@~`8Sp0#c< z%~8X{$)j>% z;3tA~Ahx_9woUZVX!5DA&}xxfW5|R0XTcCZ?Ue=ljSqrrbG`-$pf!M}{HjQU?Gt59 zyUUT|e0W@cV>7*vrsJgI>a#m!I`AdQ`Y5o;rbX-SA{P}H-YJ4L*)@W& zVOA;V=#Xy-!+q>)Nl#cK!LNKQ<(+aHK~T`#S83YM5SqO;Y28MmQ-6|}Ta;D|t47ve z;eR8O^&9HZF|Vz6Xs)j5^qsATy&aV*eXW@(c{AVWS5IkA5!vCxCNdUajlE~jfC6>y zY)=5{jY$_LLDQc+ygZyUer5SWoN(8@|Z^&XxzXiaeg z+TlCIuoD`^NX%b!2#Q*#oY%+~9Z4Utl)Xen@mldrd*f5S%kxwlT+mFaX5ds-FFZVX zG<``Anvj8nUs$p96|dOUb0`4yU)q;0#$_+qpRCMDTqY28H+SScy-`~hY2}X4ZP?Hc zlcg!)Q>SX-1d#AH!(cOX8sGzdIx=2Ay130h=dQn=sp?JCYF+8=s{fR=om*uN`_+R_ z!X%ije+NaK)nB{zDl7q(#1wcoU{%;VeSEadA1*R(ogElGy#=L_KB#RJp>qNQ1o-zT z;DVvX_0NZk5Qt~N6-UVeN{-ck>ed2aj`pKE7eM+_0ft|b`OAwkx%lZb8nl1+rYQ(O zWQ1MxT14lY$&;@O3eJIWPYFS*94wXMk*yMRJ5T+~MuM^R4=cpNE`kEBOF!7C^QK%v z8qVkh&|%K<&nE+p7Ih%8?NHo6%+ju+Z4U(`1o8tJNM-1JOn{pegkQi6B<%|g3E8o4 zpIH@)A=q_+r|=ywcyNCVpjO@b{s$3@VVnJI#bVr^mkVxAF=qLT zO29FOkPB4@w1%N6_VXd3a6BV~lIxs^W`p1Y2eX`!_2^psA0C2|Fi@fl0iEQd5Jp#) z!?~UbKy1ar{jf(;opAB}Of8iDGL4oChm182$ypG!k&6L14~4*}$efq&=QF`Xx~+eG zo!Zh8TRw;~ngS*eg*ghI70@=|i-DiaNP=f!Ely!1)Wke?{L!e+7ovh?( z9-}lgX*ORoH0*FtPCGHzPnc)?$*5YT+y(?BHPd2Z<;f+c}s@$U^UqamNmwRY{H2 z6FlX*7&+BHoNG+FySx*MOtwXpX_vz^OS^MrGqbaPsvxz!`m;Z-(y7J^P`S;O5N;00X9bs_?w1n(I?{yLu52|1oIVeL)Z^+m!gj6(F%6>}js&8y|4Oby?`peMoT*%h?1HXg zslX;-s~t}_u6|nO+J3#27uTnt7AmgVxxR3LTJy2pv zHJ#-2roMA$t*rxzcbI_JNfV)$pVd#y-T`YLHpZV07bZjXG=iQmpDaDW|2P!zu%Fus z!iAZKb&o(E#NwK}2;aSO$gMdy2}^<6r2-gN=7Jf}-qQx&D~Xc12?)!{LQ?=Bl>$IV z`DFDHNxZY9YA0RLPKboj9jcd*frD=8i=tms%Yu|YbhciPUSMYobWBJG#fd*yvv*oB*g7#vcYrEYR2L5 zr=t8_@CJMScT0Dxw#maB^RE}sEE4V2v`cygj73X|v&y%Q+}?gz>N-qz8%V~=u%=+z zzf9?3+P3YMsAf6Tu2x`$F>iTrykl}rB%gM1{2DsbZEE@mPxuBsPD14Exx_7(Qq|H= ztP1#IeBd_>rLgAH7=^j@;J)6H(nW5s(8#TQQlJ0CGh=bpUyIN3UJJzDb|%VGJp~;o z&3F42?#_?1WY6FP?M2u-zm&T6>;nu;>!Z-l!{UYi6y%%NV)nVbyqs`aO>Jfk79P+r zx)#811|rT*^Mm}3AcKyEiys1PdBEfEKFbhl)kdL~cfSh6Q^tSWNY9C#wVry=IMDVs zf13Eu8s*q=w0v_AT;Hwb^re7ROAF3!x;6< zWR_G*)K?+xd$QwL2+SIw7k1y(brK?2f48qz+C_)e)w%kYhZE(5s>vtodS20dwA{n? z{raFkefEr&ouMB$2LV9Z%QBwB03#MI2|(TxPQ#F7C9$=21;GcXoTbqmx2H15IIDDa zZjR&h>3V|rrAuGDnrfkI86;pS2D2^&KnDL>FJR=1s7WmTyGYk1(-TnQR_g`KHgQ_b z!X_Yo&)w4#wOPz>R4h_L+2Q4mxF-r1{{KkO;2qkj*s8P8#P_ko;V z{YQ`Od`=6tD;FVyms33w^tJk6ZVqpKn^vPKL4LJq)20{TSlBB#1*vqDOt1(lA%D$B z;2cJ*;6cu>A>W3t53TLzKNUs%mauzA!7zU%>Ct%wg;_qp1|kGu>M7`{dQaH*(xppp zR4st!Wdvq~$N^R8%`Xikc;|CH-wd?ve!8~*Y148E7=S`zWiQKU24mhxFgy4Te3V9n zILPuu3_9noJ<(t^(g{5)gz$#$oL?tZgYS0Vq#_;mG4pUi(&T16@*ILE-CV+??;6525t!5rXXn zn0=nnC`Kl=K(mVhe}st6FUQkcLx$S2VD+Z5-n0qY)Zs}wev zCw%4}hkD?DSBkVA+OUUtDn8WLV<8f9k?p&_d?`P*x8OZ&*ND z07uEWi8`4{m}&cYYD3YjmhY4c_tB=UgB7%c;9q6_`D=3J@|gpGfh2C{eE)}h zCs?Kv?-Uzh1(MQPgJv>v#TYOhEVL5UJO~BYV&&Cy>R1b~86F@&84jJ1elYL8j=J8k z!2tT}NPom5G^Z|00_)-%P(wArVlox>6nf=r+*6dGf104LKHnO)_(9fqXph!&&Av?Q=13?;k57viNUHNYJ+K2 zDocqa_@6U!sn8HitH1clUj^f5$tslg_V&;4|K?av&j4n+HTzQ+-n9>XhAicwLVLmL zG;^y6W>nnh)wW)+%lZ3hH;#jG=6#fnfjD5T6$>8fwR+gOp=(!W#msplEe@hmG}u)9 zTL%eayAlJ{soyh7Yev`d>VFnuZ3$p1z*Y9x{N06_(UmNYFzlnODGT;*Lzz@U|9YR- zoXV9>6QiRwfZQ0IC8>OkL`+$N)12YIW!0GB9*9stltD>z!Q@#%(;vamX!Tm3+lyRl z&~slUGA{-7MHFpEahXT`F_e$MKJxNhT460wr@;l@X^!7qcHK=)T9IKo)(Q9FKLmu@zxWH$Z&G4l+(c9aIt|3baAXWe z)P{mWvPPPqWbQTTL@hlMtbqKEX^-ZQ3s41-@s{G2n7Faa?^nvIqZ!3jRaL8NZph8@ zKZNNWY9NqKaGX+=N5xS=#U$`$qCmw1J=6@W!UnJh9+U5Bzg|;PA~8RzhBO&b8o;R( z2Ui#l3FrJpVJ)xIgM*_7dxZJzKjdO-V)8!>Ysf$bJn7tHMk^ObcAyC$$O4d@7VXi3 zF?ZgoY$stEJSW}BI{U?{^+WwLUaRsJlH#|)WY8smpzjj-1oD@Za%PirfJb-fVy0uI zNx`BI{qdaAiOhCwM?HR+5jw&Y|5GV%A@~VzsH))R~LnUeLA!npN?+ zGI$;=ek#F6MyID`PFO+%Oajrnp`)}1Y6f=e6E0wout^R4r?JzQ8XtxNI(#Yz7K9o| zXJf^C>L4G3$;VZw_D{0th#|Ki=w_m#U(kgL?+Z)z^9m9@SNbgZaqvf;fN*=+8B&YghyP>eDJ{bvYC3+?RO2Yjz{)W_uH6h zM7Pd9K5BOF8-O7{;1BNDg+K3hu?#Sk*H~H-4sJ+`zNBITkRG=;DP+bZb#~KCdDVqv zf8uzWnP%MmqW7;j-+K3+HEW;3mHA_Re}D4oOwg+q60aIA*4>}uIc`7{Vyk>+By`;% zgsDcnaCCw=)ERL<1;lkDptv*uUL9a@RL?($ilrL-8>BZ7)pBf;(7J_+waltj{=>s9 zeBgOE-&zXY8!7%w=(QopEeC`}Bv%3{&KKdAOR&vG6cn7heED*-))Dn4khp>YwB&KM z)DtWjWt$>mWJA{vAFnF_?%seUYvZtGMrGD)e_22-c*;bBB|7+Esq@>8+?*$oYX|a_ z{6|}MOfKr?TXgf>nQkrJ7c86*SbUICUHFoA-rScVFp5xyv3NRFf}4wLp#A1G&IfgK zZMRtPc&2`pP#E<@j9r6h7fiEnAHK=b5(pR}nrOlt8EF}A&u-Bmu;?QopA(9(@lT=2}GzcPsXq}?2 z+2>3v5BqRg#zvcWb%>B+?oQh0!g=Al=NZgyv$#_~`ys&KW$Eh@M~!)ow{Iqx=*i7f z`#5a!1|rH@8@Zk~#t7UPwCjC*x&E!mWjur7K!7D9e*Z~_^hsWuqt0Jjr-SpfRHzNA;VtDdO2i{{@CJxpK?z8JR{+3Gup|ul@SI z87q4HW%A`4VY=KY*CLgoweW6JS6QT?Oh#4a)RlfGd9N!T#RkP2m?vV!I|*^Bp*?C@ zU1QVM=1DgPACq-Cpj{am?4%a#IOP}pH;aJ9M^2*Fjp^<#&cG@KUl!r?*>A%q<F}5W)L9R)kzF>f6L%vFMck zkVH0E^p}pfdfKK(oi8W$U9`6>K=H$1k@|5wCtZsI%vr?!gP?$)R_tRxJrs}p7+}3^ z-?Jwgy0d;9HBhg_@l$L+C;&yI2Fy6YF0NF;0~*9H25`*$dV@AT6H8Tq2GhU|vbY$- zL@^A8UA@qPD{Ta3@iS8XThjC^(!RS;OQ8@oeQWTmZd;^1yKxo27VpaL)pW~*L+AJd z%~XSJ?`ZX+>34YA(1e0RDbDu8vw(Ou)^Z4UNN^56gotXoJTu7`?waY*_&+WIA2wY{ z61(?1U5+Wz3k3DiDQ(c~=mxCshKIgKW#4m(LGg%XUTnkKsocXh_LzAR#wYH2kb0ko zQn5H>kaGW!4d7iJwgm7X`)TUgWDK_ZL9})wOSf;`L z+k3Q7i$Vg(C;$k`$oSY{gEl2N%zeD_^PTVe0r2`FDqSGcDnYGwS=aSj2J9e4r`k^% z;fFlp9<%^W7q1DL9mlouS_ucodz(`tp(xiqvhUzH2P8eYnTaGrMwidM9!Ch(&~O~g z^m)_{_-{NzK`sok{wv(&wWg|zLzFDJ%yC9p%I`&hZg-y+JIXzEf0O$weimwwV49XV z!yS2#L^1?(`jh%3&fA$+TP-4!XLR{As3z?>Ds)fWjwzqp4*@-Iu>Tbwo||sxF%!XK z>Bjo0eM(#T6c6-&afYc;goO+ma>9*}VxLC=`~L`g2XXIyunh*_P^{FE$+!xwWf;mr z*%brt)8yb408tM-g8P9egi3HUdmfjvXcS0AxS-!eAVa-tJAO_KK*Dc+U$>5tkx@6e z^IreI;WZWcjOsgbb@%Rub{(u${bdpN{y6FAgdD#-=&r!5>v1%u00;e*G_Ba<6+_11 zd-A1a+qI@Rm6zUIR|PGMmX4R%3Ck50T|AAT@S5#;jtKFI_v`}e@*U6~<7ghHyFoJ2 zQI(`JSIBx3PO_zh zc6(s#7y8kuoc}7!6ZJGCjlc>WwSj>t{~dzV>8i74VAQquzZ>nZwAv<3@{)L#xU!e+ zJ_OwQHQd{`&(bMhzVlSc)~zz)s!kBoALf<~5S7c7xOf|inqvOmPfjet4*NPfj~?Cf zhVRnl@g0qQBD~W4oziO40rpZ?g~fzKI^Mzze?Z`rQi@mw$*Qz_c(CJkA(@d>u7EMu z2K-?LnjpxtClfvF-|zKcN+LS;ppfqo*For(%~xN*>g~NIh?{$*lyJka%2+8jexNzI z*;4uPrOO8G_poKVyk&gKB9|QNzjf0O8t+Vhm84>L*SP22M@6ZecpAZ;kFVYR=A{@bKExhO9Z zQwiop=aT0|=YNL8!^kKS*5HvF;D5r`xOBgfBHxI$p;&9}#S{ps7>DRr=GZ%Wlc+bo z1r^hxRzaRQnq#tAK6xV9+SRQV7Z5*`H*YF*R0llkfc%8B%waq#2Qw5VZnj+Lt`A`T zHZJrKdPrlIo>R-#H1N`e#~+RQ!QdeVnQd`nTEB!B)xz^Woi##he3VEio$G^IX27^l zX~qgBsVn^#3{>*|I5SOg!e2Z-9QFa_p_jg%U@TGm%-Yym1I^<5WA5bO*z4yWp?0fp zURuZRx-1ZdWyij?Lky<1_c$-D@Kr%`|3h(myJ+EO)EEGLe#q7^ID-0{Qvt@E16|rWn!n`H^KU@NId^o5Cx0OXMu7g@wPX;ge+O(G zvL)xX_Bs9y9ZPW)ga7|cpSj!4ihY;pa|E|ae~{h#?RI@&G4LIsD)6lTJ z-Dy4^dBa0a7+`AAI8TPM{EKBjjyol=|Lm`^7U2}kj8I7}Xzs&kt4pUdN;x$N-u7Xr z)cBHk3$k<-(I#b6xu#@YNol`RMGBxb2N;Bwgof98)1=y`?GfKSHxPWBrvf3=-nqdi zp8kp}=39}Z!Dr=tExCY+G6G)H6mzMYo7*t}*D55p)2Bg$LxG|TOuVV#awfL`wZ8gj z?S!Bbu$@5^yL}h4d5ujjfuzeT)PfpifIeCc6b=J#Rn1;_d$U2i4DZxY@Y}p#8oz3m zRWukM=W?g^07FtO(;~dDA=akN(!0?(hLO?pnxt)#^KxSPy^ZCWXdS;B(ryPgOh+p|i*4#x zZ8}e*CPDmFtPE7n9}YgTe2|WDny9U;T)~=M`Z+Aiq>KY^O3YLVX=15u%->2LKM?-A zLXuM=*8Q~2f?iSaVaf$N@h7#wU<$yx*XOqgsh#fUk!RQBcb;remiB1{=!$`syIB)4 zNG;veB!AkX2*9;`0Ire0B0ZE4dVZgkZnO*RA_Ka$t4rRu9$F6{k z(c6PhM9VX0RMQ6Ms8LI*c~yj6T!b5ndq)ZLFTR&(^f^qA|7+m&*&%n}5>r|RbuoFE zhc*1Sl!s9Ug!gJ~M@}B1ut?G3Zl76|*VeN~Vbq5Ad@kF3*nmF3EbhMalu2TN?A9mCpu8a|_v;f=s)1gn6yU%VLs2uLGWvsF`8WBhi)> zJsHqwSWGn8^jYg1$vAAu>056~WOEZ&j6xi-gRN=U##klxSz|(`!&2R;|B!3fKyo(7 zw>cvXT8J&3R{@#Qh+HS>FvO%rpPk4`)&s2W1ag;`=AjzTcmnmL5r`oJ?*X!h9c}V1 zv0HYZU~66shCDjEk1T{~QWsRV0b?8;1d{(m)~Y5yDaxW&whT$G!`Qk5mak>{on|R{ z9NC0pTK_7$AOp|tHjUK9WIZiOH3 zjl4Xf$-+&KC)saXOgR%|#ASe=PR#8AX!D*z>^X8Wu}Os~qRpcGI4Y5u)???shxSo& zvvYUnLm#KduS?UuH{6+jdwaTBJ0E{K>HhJ!f-0ny4p;_7V6}ecrNEhGghCG1Eb7`g z$|2|8K}ZELX_pBLU2~a$DX;JKrJaJ-UF2IY_r!N<%k&Q`mV>v@esH8pxuEF0Fe*MD z;Y$t9x7WUGbIIIGe}6_XZ}{nqBjHQq!OJKiNaa79dd8xp#2EpV3UJ_NwYv~YD3vwe z%AIm%GP?^}$vuMkvaw3X8{z{pO25M;n}p7Gi>1W!;;{3e<=?NN_WhFJYEA=GLa#2m zR;IRtRb2`8BxinyML^F@K(*X#S)ys_V7SiM_;@lYZ@tH~k!rw(8*zLr>cKf+gXj|A z)j_Bif_a}cYfI$7CWQvJVIi-9Xz0Frt0CzFG^mKV)HwGcHDjlL&Bh`4J($2&aW9;7 z*e>HXt=dV=#>uzvlhvpKJ8v#S28e)!DBLbXWS--aO#{Mhg&Ui*tq_|C4Q?AfldeS= zo0E4Pia)Y2^< znmc(cEL5CGVSWLpW7u8RL9pJ6jo7wN)A3-okUda4SMHjeKD1M&B6cIUtXzrOs;bGv z;d-`=;*`HyJN;?}GpNuWYJjqFa8j!2Mu2Xvm&|a}&Y$m`2fY2BqAXU2fiEY!OP+tQ zs@(w7#Z&i5*E(wj4`BF_KE(&+rLuSLBkl_U#NeC}?8t?bncl0ujmdau`EMK=g-3vf z>*y^a^YHd&i5lm7dx>6C2iUp!K5(UEf%`Q9fHv<8MoKGvCK^kG}?|%ioHiKml5= zBN!P@J$wra4EaFlLcj@x8%2Tx4Cd(QTc6l&Y|mZmRb z3GV$A`~Kr?FvUZ>X?S(x!-~dF-@_$^oEPrLB3N|sHCDNFEg!S3QG8nfZd)fN=?U2O`>2WWNcg1Yxif7J9S?V`?eAZ5D%B zp15-9yu#uBKvj_LjmETc5q7kubbh&ZnYo#3Hb{Q(?*>SaO$2s_``y^d5o-|(c+ZF@ zPr(=4rQQfIXijszN90TM9(ji7?eNS@v*SYUpVdw_DBX5 zATT`UwCdn6mNzv%SZhPED~A8+zx^f3OQgC$CJT1io4Qrc zD}>kS4^r~9a92qw{QP2lDghcl8hSC%MW5;^%So5Qa!TE5soXfZ?7gK{F7&DM#iCD| zd3TY&ydhlsVem%nk2~=QimhtR-OT_n0r+8=u;n;hXH{6LQoY++U;fP^WL*!>J{xon z@Ig38-Ya;K2n!8%$Pe#8^@p6naZBT-y??k&Anqo1tmNR_KuXoXv}o_~!-qfRHR@~B zc|lKG1BOtIv&oQE%E25D%-Z;$9HE<5`DQirZJ8mU4EyW;sl0uwcf?AJ0%bdd@{uof z5_JNSx4hk%UmvAzm}ZwY-o=7Z*c=%3!D!rk*S|kY*~WC!hbVS5P@;=3(gm{~YBdDb zE&owwwJYIfTW#zgD8y-tti!PJmIUyhOIP5MUXg^c1<~iR@9^lPuVaHvg}n}U2LZpg zoW^v-K^_&WSK*&EUMZBoL{4#DcAEFMQg7`(A%i*h>3dzYpI|-DhZCVUV#z&P>j^!u z!`8rzcd+Bide=gU;k&^4c7$w(pFOqs1wgCuAGRA`ye+P^6*xy>laC%hK9IVhU`u>U zzGkShlm3QoKoC)0XevwXl|!=uWn5aAZ9`v z3DRRqHg1?2s3)+4 z2cBh(V4_(8sB93KOWH+7PF8A=p03tK=p8AhFbH`3=utI5We5J+hcS1asfkaZk%Aayv@tAI5a3x%Ix>&G?NnfW+?UIJ7PRJK6f z374k&L)ec=^gov-0g5s-m;)3Y4Aj>Szw_|uO~Pp07z!D5+JK^MOI-|JUJwWF?S83s zE_H~2g!FU(L<<}&z1&YZ(31w$p65ylnkMMv63_5L3`+%qdDx%Z51$~Oj4L1ji-8m5 z8hW(W;P>9LAVKh%>v?De6|P>#n}0b=A{o`pm{W)hwgL{MogwiI3?Upr=`-TtSaM^0VM9pdKE@XMYMNtiQKq_>s@7fy0UM} zK4@6F@JnO?Y;N@|NH!9+ix<(JNISgMR&S~>1s=-;D74mOiQwDjo{SNreO|1- zxYZBVbv)b$BY0%@f?qq1zdiZ=M`-It!9Egr9H80pDSBVekG$k@2L(>E}Pd$+T8F%i~ z%PLjnw*g=s8LGn^5LAmoTx*LvF#91V-f~6r8kjMD`sajt!_^Bdj5L;<__k!1(`NWr z_+L*?>e3&DH`hOeb-KFu2c+>^4Cr6OqaTXs2U8*bfP*mxn~9&WiD!=-Jt~1l&e6;P z0+tb|X91lnbCXTyvXGT2;sE4xtDYBaLRu6E6tV_o=!oe_Z;dr0FUx!wcCLV-B~oQ5 z=@{FzTmAYfFTx7WtZEhH&&K|ORg`^;*qQ+x*qW>iLk)oX*#uQv6hKR8D0paPtR7*x zkm`Pi+H@*3fk*PhZ(&~O(!tCU;Xe=$g+CM2BclKgft(P5SY+@M{G(O1_F19ULZme?K?3ar5TWyv4K_W7?7? z4IP|x>e|Us9g_!m-SRZfZxJ!nhTeKuFhrLlZMHNRknic46CxrMG;Bsk0pIG%Y^A`W z`%K$yXx}I~tC3|@1JHPs;=^nfXo3weV_<~9A&Bvwf@dND{mL+t%P@WO1=^Qn7Az;uFD%~kv zqhnJ_NgD{#QUa0|pp=3%!d3)CNhQd;ZmQ%n4V$T3o(9T}JGpIPtJ6iH`>UoBJpOoY)OX94yGG zpC*4+)Ic?y?ux2C(Tq zwZlVQ%|@(|+B_yL+-D}bf8e4CV5PJ|q=*VSwHoxCSZXT=^3?<+CAkh&QQ~H(GiVD;Wjs7Q zOksB1BjWIt`q$6I6cdu9F$MRo;+Hg+skY6uN5~+U8ujL0cD()ZC2>b(KAisJHfJN- zjvK9C2my9vB;>Iq_9pGvBr({x`wXDh3!NaqYHU6@3GUFEPR4PjY$5Mj2dC4{-lC3) zk#2>~FwFGyAjpf@eEg>;MkmjqKM<9$={nkW_R&W(h8#RR)7jBnZ_qnQgyV8rpLnPUk9)5FD{dMn(yt>V#W8D zAf{h-pEdrRbd!0X+12&y*CR|&d+3Egs#RCx!ae(x)-jZwemAl1_~G1)?u?^Bmi-M zHUd7+7rl=D{tUn_lKhj@wiTapLx1) zGFF!&V^tr!F}v0F20bHU^-~szN`uO znGNicV~u*m#ih-uW3@YAX+m!hDhc<^3}dlLk~EbjMFwCK?JSs{X?}WSSjp@<14VQ* z<0SoU!I!)$Vj-?3%FXy&Em#jvsC8IHm_WHj9*8(xF&F4x`n;kEl5c4%E3|1Y@K7AH z&W+0z6n)q8(HXkmgGw3Wz+8j96<^*Ik8lD$S2A5>Y-9bCI!TJu@F$*xJa2a3{mj?-0?9V1;lJ z1Oq+Lh8q>J({J!adaChSbyM8c!F{AsXl$oM{kdz1nsXCqyVw2ES9vJ6HcxL~Js|sJNDMzK zg~#OU*}3=M4DD0D7Zay_7Yx(Y0(L6c<1kz9iebJg{g4aMGlb~4$XXRn#9m&@Zh2)w zeX>FQg9HTUwP1Vez-*6boJu9bxpfIatiS2IE5H#q5x!pp(ov(M=?Yf0BXYU=4hLsN@m=$E4D$*|t62HlrE zq@R@d z)~Xp%Gbb}39wm8>gMvZI{bQp1&Oc6`dZ2aiVE#QJZsoK$rmur*vqt=r2ji3LI76C7 z92R*Te_2w+b6hl^^B}v!ii~cLnVIG%yGo)&d*7u^i`bqCm;0wqkhNR;s&JzAx^k6| z!JtR}l&?osDDmb-98sPD*_g>GC@lO4w+8IxMsTy4?&D%ma%VenQugMrF@ zL9S$}9AE~6Jx$%X*OHcd4wU~8w<;L-MC^blj9JX-5dzHvI$o53?(TC-T07u})Z2jf zTqiy1IO+yI4vu*8?0t5mUb>~gW%T5YjeB9xm%eeR;o*skjwup1dg}&_!FA|@^{ZRNzulI0JJ1}Jm(lQJT8*f_qQ?7M8fLT? zmHyNU2%Z0zz#1MOS0ip_rLO8`+lcy=$aNJ<9$9|R(%~Z;z=;$elF}}w26dr7tQsHv zg+M68I(ViayWpFZ-G8!JDx^>wT2nv4+k1HovTf)0u_~C;AIt5*PrmItP8P#J!fFUi z`|=Xv^CXAv{63!51Q=6|32!j-Sw}gb2Zb4&gLbnFXPdrkT`{>u zhO1A!6}y!6r21HGrBgutbSzuIkCqV_*fkFh-5d%NFim+N&+TcojB#as%0ungghY{UCMXsrn+M!ch?(>_zlYu;q4|0~~z^uy)8U$Uw8M|u5I}Wxn z3){vJjYxe#62O`LLH6Z)vm5Vjkup5{KKeVR&6*ZS#4mr5R$r^xJ6>~6y9;mq`9Uno ze4PgOn`W~W$pZfn>C>Yw)#~QjXUvdz39zrWdh@Gh>toRPA2Hc*q&Ft4sT@p~B*|Qh zGDQmw*)AX|Rcku1V()x+BxQU07-QyGRQf>f$m7Wxz(P+~us=Q+ma5;c0%Vp#u|#~D z16g<3UGwp&gPo_D)p6{<6{LR%esWvg15JM zSDdqWqi6AM5rrcQz@KR589*{FartDTHv)H5w`i@{8IiC0|9iMpia8IyBXuM~15|7-bkCbFBN3ft z*47(l*Be^YU+zOc4F$8cCtS8B$9I-O>jmjFc!o2E6#qZVj9ABiZw}xE9+G*;m-aqc ztJN>{tH10CQ>&a%L8~_Nc54?tH-LHM58{(L5=PIX34w8D}^y%i&zEWJc z4Rg*w8F zOf_Z_?qEQs#ZD;`Qx*6Kj`q}++WRw+j*Mve$;4c7xCV=! zI%5hZt!$PoT^h#Cn@8G1aI{(72p&E|4wby=H40IM@02-0{C`jV z?6XpZD>E#;)4pF!zt`2&Z1lAOoLjxvmD6mNxejDT>mu^q$wzX2tIf*V&A!xT!n^wc>&lSgPyI&{Et0h(GOtBelNI6{Z zIsnaO(@10E%CT4(nLy6&m*WM77wrm-QXGD~(}0<+KoIdXXzpHJ(=smw(Vm*A{E`}x zOI#ZBhK+=|xmCNXA~U+FMiZLfqjDeT65*6Uv*-`hZIXA-rlU;6{x*LWsj$9?Gb6S`iHr}+__T)tXf>^ zSZo=(f?~-9sA+6zO$a;+wNUNz1sP?~#C20*(@*Wfc?y#4n*USak@I6JKBL}BGmbUy zF~>g71$8MW8tk=&+{gGQ-A#k8*D-8TZnckWc%;fgRCUVN{zogs|2Ct8$VeOi{N%+G zMh~2R>_Y#WN#`#D_fKp1JkXL|9d%kVI)p9Roik}-G{xlfAZN`v-By%fEoS)vuLE}Z z6)=VgBne~_=A|L$^nLwTw}vry$bRje>92;hL$6&|)U?1(k<8b^rL-ZnU8SLIK~5~* z6EdIdUdY|^L>zjP|C-mINQlA;UWf z)33}_apcCMWF(YK5=6;XiOgC>3YCTUeO-gNRVpa!`M-!uT?$BQu0CO1eedD&70V6! zA9RL4&8R$PI;*AKS#`AWib3TwoHOXw6gr#!Q~a^ZOuOUWk&sjy^~px{zcy3EN*^3&H)FrSdC@&-v>Z28 zfIUb=RVpE4+y=c#oz4}rhZq0S;F=e@pb|MY~If@aI26TY_?ydi<4z@xr>1D*BnfVYjndP!x`@y#NYoy7R zvs$jq(sYX!K|Xn5v*b}Y;_G>`s#2+Z)nfO*e*LQdf$p1@jc z?17@$7m+`te_A!5`LBypVkfXY%y1GgIE|{g2kr02%MKJv(4s7|m6&Yt=)ZpBMxJHy zyns_{xX2)y^IQK4)2}uxbg-dd&0t4|o*{m)^$`N=KRSH$D2fxv18KGmP=%<@5e*Sz z*san-){g=B8-2~0N+0E;ACl4sn8q8v($2Adx@Iixpa%pWpysGd1WY4lJAf%p0kglMx@S{*UK#%FC8a!I) z44%sL$tx)TPZK@xbQ*!$eit*ca;cn5S1Z=`OI|4m{35%8%il-jcCZn-08L z^EK^)Tar{X=7)A~-FlFqpaeG=g?lGJ$5SAHrj2-BMT%w&s5S zsbQYoL)|Epma@#Dr{)M)FQLd!aP&%r5%ctT(QF-vam>t!9fDt+J%DxC+bEgBEDEkS zU~J$5tVo=~fE~_%Qe-j2NZy#3C9pn-fwCyQ=W&Z=Mgx-s5YO0f7u7%CjB2g;Hi7cs z9n828y?ZJYm9;G1~i-U6cX z3+(vnfnWj25<80N`#Ln7L?<=kn+dgND z@+2mlbA%c1Bz2Q-G{q@_Wy!c!IbH_=i)MbppgH&-2kQ|V zBBzu1@p;=X4}(`C@MIN$V!;@_CXNSq{j>>IgDQ{%F7MD-Uh5GdhEI`_o^4Rr&3%e} z(g@fn^<_I@QXR=TiH}t95rc#k8@Y_1q{Jft(L<+K=A7>tbq{D20%!J5z^sY1A#?{Y z+K;5l!E5Us0s!=!8yPrGzOWBK*7XME84qt8M4f6IY`Hc=2_|Cu=_)JLZg0I=#Cbk< z%;N&pHJj&K#h%tWiaoI4v3N7ZR&?9fRsFGEP*gKlIpvQy@ z7g9M8w(r3D<1iSCI`H)8f#tZmf5bK@{bfJ+;Rk-?ZwX)qWcCN`20l`nbLKqO3}%Ot z(RM0f4`%})%pmAvegSkT+ZmaeO$~jCs~EARkPF;>m-q&8Q=EeFEWE}5u=1;9odQiX zGwIJ?qjQHqgZ>fw%b}n!W6)^c-LgD@QD?6>G>cFzMZJwjWq|xim;n6>*nuj~2P(@*6Gt z(T8|7JMK9fMgE7heR?@?IjgQ(HjY{x;JQ?&Hu?>qRdG? zS4WN#YcT5(r^%9$LX8+np{apk^`UC6BdnTwex0)w_TTqcdHaaYW=b{$7jP2V>#E4@ z?^(&dVT0guybV~4ZhJyoK;r9FutI>=V$oVFLC2N8Q!(Npyd^+b9V503aLNDny~8iu z?^prZ5i>K(2{$^zY&*8QtC3w`7)!1^N;^D%;dy`Ex#L9`vLAst9T45Wh>Vxu!{k+2 z!N{G2XlM%YJKD|Qo}G>$v`Hob&vP92cAn6dlGe<&ij+>BQj|=cmc-*sG1&pjO*m+V zl=h$XQAv>6;ri}3vyX)1%VtQm@JxAhhndh;fqQVWDjeGFA4WKs1guQX5i=VpIVabz zTPO$C|Ni?OLKC2mQ32w8OlTDFyQSnnW7;7?RuvQurQ{8q#7Mj{X4)WYP#{AEK*3Et zH_^_k4SQY5Ka}aCR)}X4cQJ-w`WJGS(i3OjQ$q%oAwI^}67nW+ZQWx%wdcrpo& z2ReCYi9a6}Ud?^HDgbIet>7+S#ZqlFS{|J6p}W^=?a^vs(Wwl5h1%-z!S2@|zfb(J zde*#47g0Xv+1vdau_c2W`Zor63Lt`gM!BFC!mX|R*j!`Dk|o6R-vuv!f0jRh|E~MA z=klu?J!44iBzh1PAnMU>!`A>=jSWign?Dx318<2n!=$Dm;Xa5N+Y-v)Ntgv-c&qmY zPvLQL)f&<;kJd}PgvoHh{1b{AMn6;lH6dm5UYn`K6cdymtjz##<1Nk6++w#(7%n|w zv>L=Lm7dkKRG zI=N364ymA^zYs40?0Hkzm5Z{#7^Y@qf}QXczl3i5=hi!pez;2UV#yf0#|+~JHYe%= z@B^*(rv{*%4-+|`pyM`lgf1l@zwIlU3-WiVog>X=sV3O;0T4L|kof#lO1{Xe2xhR) z>^30Qw{RCc_r#YJH>6-%NEsx?%EVj*TPIFnSOF z1YoC^wqkSsU;eN3%Yzj#7x0*U?$_a9izCVQR8(z3L3$*Nq1YP*(gi-rDH=rr39#V# zY7eay*MH&*jN>34C2-9J^R^IPLceh;YyOMfkqaP&H!H>u{G6-!M*wsD$l|OpKLcP? zNAg0d9v8}iiLy^~YAdHXvy|d;;zEb({!`{h@!CZeIAE%(VRbKaaQ4i5E?mYn(IcPE zPLHP_aN*uo3iUxIkKL!n^9qq_Q68Y3^ zGLc{hY1FwKUUEG$D&u}}3L_Kqt`lB-UxE`7|NbyNsx`e!;eWASuhnQ+8gQ<|8R6`m zjlK60icA5}$%kN|zF?OIk=m!WKQ2L#|Mu>bQoP&5@R+M6CUxqPPd7|)kc&eR3&1e# zY>^T3S}Z8@@}eM1czMGbWma?fTyl2nOZA+B`oUz!<$v0v(w!xbYA+(n(&!YH}Gad@8onj$L>cq$fIdlKOj zW3|@F|7-4xwJ(=%&EWMnS0bPF*Kyu7IrNv-p?8M5kPhwy!-kZ z{_Kqi%}R=+QYr_xrkCtn%+?i z1QS$^b)hoGMM(M1P0da6dKA-pzc4&Qc1L-u>7Gr(IZDZGprSy5z<2SM}Accc(7dk%1JG_qu$sT)Z@ZEx~hkCRR>z(Xut~ z{I?uBwj;~!;Wdd((N1glGfwC2*5owWHN$m9b@#o?{TR3Ul$YJrbMV;5tepDlcZJ_G zBFdwT9KwSKYiinO)4I2uUBb|oc3u3+A8GqF=?hAGb>RB=U%9pRNrMji|Imrk=UX>F zU1NFgzp*nYM76QotUhsYiwjWuZ)9!T>kJK=qrsHO6hYu-*!V65C`O2-9FgyC8M!?A zEQqhPu3IVcKS{NBR$Sr)f(twHzlGqgXI1H=Gp5hA0wk|iw$>-!IlrzW>7^l}tQ9*z zE~JfXETFWyk;n-uKiL)gN$t7Ke#O#g%b$X@5ZcH%_@s{37)PEt@iY-Mza76G!Fw>l zY>=W=ODD|~u`NRJvE^DXv!v-I$pvvq(^p~gN-VN+{7{dYbyb97l*_50 zBStlHF+TwJ?(SxsLhD7$AZ(I!)VfkNa4KR@c1id7=yCMQIkW*5P=i(5a)2>nM8waG zoN?5xrQ@HfB2UBDhaXFqwR6_DjzG7WVEO*VxGDON$K(4lFJ}oI2FcfMWw;ULKo412 z+fQnIYsZEa14AjIv@PJ_f5NjS>F-Y4aY>+?r~eHk+? z!Y)+-r4LxB`9npaSsWn@B4}fj*h6%VQrVLHcXLw8Ntre|C!>;(=)gKs<30T+XIS!8j&yo+4urPv;pLf$rE49odI-n z;xT)k4Vg|;svin{0qxSlsbTqokt$IKr+FNY_N%Chx0bS#7p?6B_;I3iichgZ?ePN` zL>;Ds2{(p;_By15WRz*4ezgCpSa|NjEkg_MO&p!*-66i{RZ+rYwx)CWD%+NuFRR|qpq=(R$X{fZ9}Ry!uU#%B zH#QC+@MqMD?;f3fPS%O#DP9AeLqc`L=fY=8Ubv*DF8S*)_4=73>#>+;h0IF--xmK?a5-wm zxr%G?tqosr-h#6ym_=}-r)EwNq)ZZv@0;-MJXfQnS| zwYg8}d$=;5OgncZ9(VxM=%*4XUa&!t#by=$@`}c<`e;oIFGdpw&^gySDMgnY+m#ya zvqnudo)?{*obUze>mjpi*k)1VM^-?-T^0!GD((GvY{a#t|Q7Br1%6-DcLqgyQQzAYGGIc(lkO`gWG zxAm;7$xQT1Ms;&5n2z!H{ELGct;h;~emw>RMAQ6Hb15PxsJikV*~`okDWx}D?p8OSm$?n}qd z36x^?IojXXCGZ=~cF&atGMovt2lS5S)UGz89d{8--F1Q6#FTCA|9Ziv_e&_Y&vMKCfj zJh(Xxr5*>Y;KCN<)EYair0feDT?J|5)@|E%h=_z$H`5L!?=8U#kdqE$GDi&OneIO6 z03G_z&j3FL6*bVcO<@R|+%z)=ia%>f}+om^1&fr4=z@-iDrRmscA-9kBLVi=9_0U1c~_!n94dh`8v zigjK^gmA4^gE|7Ho#&$y_oZdEv?ADOPhZXkW_mK;@Z0LuL1@k2c`sVHa67;!@44Vi zE_(_r8CMQNTobn1gFJsK6e)+8n3ys^K9%1wV7QNE=I7nX3IF!Z-X{h9aZVy$f-=Ys z_%oCcubnJ5s{u=B*6QuJN^aWXVh#1mG(bDc39cx!T|`7v+K+87VtF9CxH9bT{Dh%T z7aGt*{bEcP-a}y@cQBfXF0VcLAlTqrZPIWDfpF z=C90B3u0Bw;JzLR#HqY!W|Sw_Z6bZ{p}-HJ28tb4jFueuDwE@Ml6z;Qz?C{1*V5eb zxXSJ){()zlTia|pUHJ-?5<8k$2aFNCZ-s>=XpAA%Up>!Qb{>S(+cF8@Zh^$=Zz)yr zE{IL4Ln9I&R`MA;sNe}~`pp*RLF<{B_n-+3gy=9MiWX(HHqAXpvSe~|X2{R;k<|QT z6^=~nIMAoca6TwrcM!c%f`k*fanAdv z+s0VmpLG@!)f<1B{G-;E$5&9=`NJi~?HKCd+w6E4Q!e_^^jjE8yM;L_0J?H}j2!N3 z_`S_6HEug~@9=W6ShVTIJL7>xiJ$hz^rOsS>FBC}xsN>T z)iQ52p(Km}b+Ufr#+|wz6Ay5Yn$DVMu4iMjY@@azxUjTA0maTk_K?jDUs;QfmcP5@ zC#n?06e)RrGIkBcU+vS&67in+t)l}bJ4O=^2%5IP*LuA%#nAW3*|q8tB~d3H*t?J9 z?g31UTYEyWDEmuixH?;P)x*;h3#LtS8ILmGcXD=?CA%gU*1EANo5Nw+Wn3af)fD;Y^c_;_wk>tyCa6jTSHqP2%{yLn2 zW~AoP*+T%#{ZxP6Eegf?Ex3(OKzm3A<>27ZTtd;mk~VbRMsE_k=RU!__)S67C-1-sll&|4C^-$&CbCS}jy?10K6I`?Yxpe6j*{r2P z@hTYZu1>v~1r_*6;#;m{;^5xQCTiuA$U` zO-!0Hkn;y15=ic>sj1;lzq@WL0t}x-(Xre6{#vu?z-Cli!59%!CA*G}regosUOU=h zN3hVt3sn#p<#j}(GI$%bbMZX-CBj?3!~mcH8iJD`eJ4tlKW}&}{XgD3*yP#e#nqm1vVNfbA)>MEG9Ooe#y^W{$?zlB>V@Qu1O!^U$&MxT>c2cNSUlGT^HNQeD)d?x&Fy?~fZ~;l3#lD3|_I+rl zQRSWCP5!|;{GZF0O@E1x#Xf`}>}r4u)+?fQEkSn7#x&jtS;6@Ls~_f`E3bTmnK7%F z=M`}=-@fV7X8R4`%ATs4UdTn|IyS`J;njxAgM`Zun_ z=qg^U!S#bBMn3sp7a|PFfyc$eLgbv%O+sgiDL#|~H@ru7C3aylwpCWzu7)T;NVRp~ zfI9sL8I@>AlOxI@!)YOS;GD5&g48*PIzKA6&{xOOJwI`A?_{Gp$N1_OsJkl(u@F$#C}(;G21KVl@b7>E>W!=|n7WlzE7vWcSPR9+$9Ksn zDhd{%j#BfZJqET5>1VWg*;M=NbTq2=V%w*5i3I&I$$g4NnBaq3%qY ziHeJhGh3V^fd_`P&jUrX9+h6L23qA=rLd~oLI9CfO=q=E`CYwgZ5+7wv?@-K$azVt zcLK|b+%LDJf-MhjudTd1RAjdd=hrui^x-I7`^2n$)_V%*)4uL)$if+Uf71?RGhY?& zTq6DX>~mL?m&=og8k&W6N)8dFlTi38D4T^4j9+dd4vWOmH8GU%PlZ3eaFVWUE}vV+ z{qdD&bLWNEa`6{-plp{Kc+{-i)@aUX%{V3~-GgB|+%-8_*_&jbDp(x$1+~9K&t>R@c2FnNjPQX>_m8!-t55FlwTS+_2qeK- zaB%-2&Z@I-Qw&ZHMqY{eA$p!m$Y3!ghb>W|a{~E^*qYe6A9NrplYE_}t!q#L-oy_# zb+CgF^(Ja;v!%sNi1Hc&z;TKrH^Bsan^}q0I3!QE0OHC60U#3=<~l$O&;&@(FMaW9 z(&*yryRf~H7a?rEvLW9@6(RWg1Y+p}tRWhP0gWjG#d;T0@6A=P9l8_GOumUi;nF_# zWc#EdyMIpYBrZCwUZ4k`$IxEM+S;ZY3N^rE9OGsUtID)V<-XgMsNL34i~?Mpy_zgfpqz>;sykNkETEAkh`V}&8B}<17%a@FF ztkQG-5+C=8l#$@Q=0kySPkE)T>2B^5Kl#vqp3}mObQR&+}#BhQN58Qc&mW6(Ozbf)UfS^1IhQ@>h4tRZ{$Tj zAg~)&5^6&1bf11Y4fEyQVX*FE*eCXHBEYj11=e&6N?qRh?$NOxMfOXSlh+w}EK~Ge z%i+O6(`$^YNdH6xrR5G})zsDsEYxa1X|9h7E$qD(T3!Byw(Th3D|_S7xH!$!$n%&i zE!HrAay0^NslOV(NzGvbEel&yy-4*2ePP^DUcNR-pHc#ta+06#Wo8phxZJY`v_wH5LR(u}r1qOM`RUiR)0KH+@f?O+f7|idrAR>uz z8u}Qvz*-eCR7g{40YysZIJUnriP%3R>s-w0BV@@N z$qI2t=FcAx;S<+AdRoFDyJu zA28%FYJ_r>p)p`LZ}I3#jHt`VTy1(t+W~gQhhAO|C*ie_gE*xV`~G;(K3jav*SDS? zaUIWa6_0=Y`eqfypP{8)KA6@llojGdzO5?+B$dpp>mk+|@Q;zpL7Fq@hMbcPgRvZ_ zEzq;4X2)C`P^=-qp{vw&e)e!-|BOKImIcK=`cvUi!NIZXs852qwG-a@FrY1oa{9O& zud^dWCgy|N-ab6BEf*IPRZ<0BfbW>*l`=#zev8y(Ky>i_otzm>^}JS8R76L2^j}no zk}Ex&gi>H)&~w&I=NLifTLDeeyEpPxeDngvq;T0-~&T_Kr0b zN_QGM=Zx@0%0^U=Xbor|BV{hnb-6_c51HuP4MoNqC?*Y8H2sALc>Bi=2??DH7QTAs zM<@6#cXfmrJIq#%4YuoUqf~Ks^on#$;pdR4sr^jwNXVZWF4v?LX6p2U%1f*tYr8S; zJ$9Qh7_a^OZ>9a}^pmMN;$*R=KD3~kX^){So)4#-c>cRSe7Jiv3(et(*|~G~l#?~< z=FqI+Cr3FYw??0z@jK?$r;FAS(!-EYmabRdR`Zcmv5cv@lcb7IQDS08#@Kysy07v5 z-jUb&?b5cc1J+YY<)w{$zEMq95)DHcL&2r}ta0C5MeS`=QCYf0YF;7+M1HDD(6PKW*_DMe?D z$r<3@6CebhfN%ByNW?(v!zQ<6JitO)Lg%5^CGbI0Oh1yxT*+>&boR&|*-2OS0M)aV znIAU^vbOG=o$`qJHtX>y1|=0pg)d^_sqMoYJX1cBp2B}|YuQ>)jTR{!%Qg1B5mK!w zS>kKM(Uadlaf?TjE!%PXPTfcDR@Zzzt-5D_IOeAJp>lx$nG_^^)UmRG!8jX>cgEaH ziUpAEX#tLQ{9Xn0>`|PI)Om=Ep;gYEGWlC^zkfso7ldQH9Rw!1x%EiJpA>5&vZs-> zm}G8BHEA!}FNvwIhe$HuI{o*|coRy~3{Q*uk6KR>iBJTsHur}fki z-PwHC^0r;$!FJ}#Ym)cAT&cS6%DU6y0DMv!3%cxrxHlORz~|oaBO?<7+#1h^og7NU z4NQBL%0~&L$y20nwT6sAl30SJf8-fXf*9i!D*HQ36g7XFnLry4bT+nrZ>0tS#`yp9 z&tlqJ?d5uhUJPUlKp7GbA)`ISf{?7m&ZRus>HD{!_}?xls7kH~69)8DlcC)IYa7KY z!o(aAKtADhRWTy%;$1JXY1r6wRNMkiN> z^}dVW2DL!LsvL3B3O=Fjd4Pfxabm@MqLdn#01NQIs=biFkBgSB@;`KCO%Dxyw6=&q zVTTO(I6pt1fO8}f($)<8?32j-I-`~CNrD>SW6iWNpp0@LXFG{uzzRH;;V`$@ghB#~ zX*OdY+dET3%zy?$67>q+S)4@IzK3=~_)FP!5FpZmCMG7#3sQFF`JAAYUe=S)P#wO{ z(u>Dgnxb`NK6ICfZ`h^CUlfHRfJg($?n5zXq=(UChMgxr(prTMs8JuI=G=w(0)GIj z^0PD3{}C`EUZN+VHKuUiF>*b`jx#y|l|0c58rFLky$7Fq3n7GL&3|)D`Yjv12?A$R z8Ui9qb^|jauKQ+7TiMVe0dpua4$eY#qYKehBA$(#bh3bA=?9g-Ym9^>geoBZ0DV72 zaQckDtMN3T@xv!)BOQ0QKCPppV_3^}-9!SloYB zD_o2S!ZjcriNxS`$GDCUeC+IeP}j?}dJLMFZP>^VK!#D_1Q}1O1B-L%jgwb7 zXqTI$4z!&ZHkeDy^z&20s_R{CZEax&4%m%MpzlxUJc5Y{3Zf?$HZ%yNqoOkL5Qmu- z(5SHe^#9H>$s@FR*rq$4_x?zn`N35b%NMki=)Kxvfm$XFpQODx08a%LSw{Cwd7ug` zhbPT_JUzF5MiwQ!Sq3HEOzCjW=)tUeaBq>%crQlnWpOf<(vldUqTiQ@4fOkx6N4;J zzRgc;E5QI!yv;WZw8P4|PkifztYhQi%E_icV6TbB;6!t)67hVT%(m1+_OPR|eI~C#(Uv^Ed73k2^wV z)wz2BK!&_|a~w+L{WLC+et9X@x`a4EcOeVGYbW@q`aP6ll|O~v!}3Lo2rAIA)tefY zEY*b0IA|H0lN7<%k{2HE{E;3Z=H@%pkW=5I*S(iLfU)$YomzP04js8Pw_`f24FfF> zUEhzCmwu}I&>uAFI^6%Mzf*5wy7Q&Yw3%?z@P6m>T9cZJ2RxqQA8oV_xN3Or*(!T& zYP!flODsG_l8I6l<@=$(GkhSU^SQ5;$xC53#sc*{MRiiI^Un(@uwrv6EpBh{?D%lq z+b0Cj`W~3;l-cIP4u!A=J+ljQ+`WCCBO~s?q`AhLj%EXpZlj(HitFans#i-Kq*lBb zRRNIY2XcFN`BDjlQC<+^86RMN;!clTRnMQ7mUa*F`Jra;$B-WBSOx+?P02txQPFrH zH^-!QC!fO+0ABkTx&Fjt1qZ@`klrV8X`w0X<9UU`H|s+FLm;Oh3N~^;>7{B}i5}&_ znWq;{VqLuHahMh%|c%L+5lb#V!MaU?bH zR;SrDT`#@vEiIC*Qi2xhMkSHbZaY-iooc-MZFnVA+xj;emb6Joq)+*8_0XgQVbn>x*ZRe@0uRw#InEuzEz~vFQFV)<7Oz9i;!vk8Iu}1}uW+{-9H@ zqSaDO21M0jK8*BPEdzi(a;C@zZG*Gr6Oe71HtcA#$}g=`m-7; zoKTPmX|Wi#mPHS_f4K2LG{*d;EC2KOj-xT!?|U11BgLFgVwY40_3RG@-C8#7 zJ^R9D=g~fT4Ys}yuXT>=l+f^g{|8+w~ux!^=Da>sy49~sa?3ZLjK|tpR6xAN8}5I zQZ?Q^)I2R0v^!a4QXsYAug4Z`cS9@F%M5C!X1c;YD4M*zcZo~4P>U}dl}4D^pZ-~h z$>ANEB|F=|o4s8{+2n{Fxn+a|*g zoKqUKT^%1zVw&fV?k_QAr5Z3bxtm8%xu(%tch{b<(|t?;|9oISz@t%y{D& z{I$M_%*sD}WI(3vf9J^?}>QD^T-w1vs)d>duRs44DJY8fg{o@%iXZ?*;0 zFL2h_0+j`S2kO?a@Fa@0M@fl8gW$VFmAFDK^-R@_K6xdjfSk`kC)3^b4z0BA6Vso( z!crEyy0;>y_G)U(mqkipY2u0{rLXheKdWeLDLv}IF2}3q8u|aH2>h*KGS|+$_0wz2 zcXEKe2n5q&yu{Z9e8~e?wYxp8Sb&*O)~?ue;%!_mnmN;*H5jSXfb*vz zg?gZa?-_>IDsoPR7#R%b)mAo!7D)6Ngbux$G!(-|kx)>&OxrS`Rw6z_$MiuN5wxtb zRo>n{6L6^v%H7P565d(sNJWWxi{zXufM#D|BKrZ-f~L*d5~I^fN-eRGfC2eZ_3{Fs zFqMNC3p?o#a}GQ`pi2foO)v`T9OqPflz+UG$iIOJHV+dsVJ$=t zN4))x#aM&{4-c5P2*1gIk9G1yYlbBQ4pwlT@hzW zMb4PUL-j3{DIy-u-MWK%VU0a~xr;Tj#u)i5+qNHEIIQhC^G>J5!8~Ys$aTkohmZIl zwx}oip3eN_I@0!?S;b5~e*FVfAzh;)WsfUG`7<;&<{k@cZ`a~$<-owAKWbnzLq04c zwOhR?UC{l9y`7dB1|E4aUtUQmw!xfac3+!?Xtd=+IPel{bimLNNAS=X0Luz%0zaSA zDZV7v0&D0DH$<30AlmoDY_Mtob|=HwYw`;Ii~|m4??1rhws$8gx?7lo(AUI9to6#J+VmsJCE3(5y`dV{O@r6T{T=rJGce_jH!ppwyo?LnXi|C=7V$I9M zlE$QIWmpFhWP(Kq>7@np1XWzFvuk0vYlNM zLqOFk&+v4(w|(!k;{Wg>nF)>*gzQEw8-uzXE*wx}b2(c*-IrW^V%a!rkaVX=X!wba zdNRti;eVSmIR( zSD~lP`X~XBZbd3EgDy2thpqc&w4e1`EeZjT}ENMoAC05wft>%a({ zr)%&YDb8Cj4z*W)&z|@p;=%p?gk4v~e%-Ze#lQIzm7_rSpQ(Hu?cKhf>^`2SU?IVD_X zI<#F-L~-l0JTt@RaJU&Wdz1Nh(q!f1v6kO8cPc+FJ%FuTR;eCGr4Reu{OHIKpm1?p z)|X9UFOLq(?ysb-Vl#iMS)Y>oBTqX)pW|BlhEyg>Yb)mGx8*rBbq!wz#Z(UHNx;sc z#9jE(b%Vk+o>Rk$HujR!8Qtsk>zZ{da-0KxTVCckJ#os_q9sjbtaLDldgocqofM1d z;$(G_0m6>>BxF5MzE-1|r90xnii^V<*2Hx~YiH%4RSMHUAk0|H0C)*xFhRaUcs@S$ zP;5^TPQhsBX2iVfq!iJcKH)Z1SA7<-JrKL*i9{mfGeB+-;<|j{;Oyhc(u%T_b=#!N zFh|Ot!6azjcpg4W2V!AM!wY+gTs*L`=UpkA^!j*=`@{|Uj<#a117tZ>43kECZ{BQ3 zT^lCq$W2rm*3$d~e~W_rT-6{rpaF}F#Y>kOHs4v|<$B*{Ej7%zRnMnHTn(VuQs(hb zoBoIYuL53K1`Vf;m8y>`EJNZ#T|tZqvJ~fZC^(cfU&YjnEUe&ETc4v6)yLGErQ^ka zhMLCY0v-U132jJw{|f)r1M~vWe%`~o5W&-eg$TJlmHp;&`uwbXDFIkqp^WuIdvJ<% z+k|&v{j->m_WjF~7D=uU=^rzzg_LjeE(7ycz3=CG)^MC2Vym?N@_n6&+V~Gc9vqnm z|3L13=5h7=HcIuztpNhe;|JK8$Mtb?bN@=IdK$z$zDe9u`D1l|0DtvLTdS)U$+>)2 zg$s{v)ldmC|dZeg$FK|h)Y%BGE<714C-=99q6V=tvgG6 z6d@`S6pD8r$n7SeyWf1uAb&sLr=90OgbGZK{mpd*-=ux4CAUl*iZQ(QMX@fk^ihAA zA`^?i5xfd_7cfzBi0;Y998riLX;8{x9yFqiJD|8EB+jpPaoCC;F*Non1#Kd8f05X_SS2tGWc_rsO;HkCseqAP;5PX@Cwp733tts(=I$`6k}HUDA%W5e%&$a6sI#-P ziGBd2Jw3Va zO_~i>EDw#pvhrQH;ICl6G^52o823eUzDf0hy!=X$(wzynl5vysVxsq@&iKzfqCKwW zZntT)TB{nVgaPmJVtp)8=zJN4M|}*`q?q|~CsQ9Z?EgJZ~Pj zDrNIu3$}FaR*AXS32pq5mH&iQs~qr@E$;ok#rq>uNk_^SZ z7P>4WXWIL6I-m~Jf9dB0sa@4O$bZ$!i>Vi%BzKsf^E!Q{;UjMF^Z5Bl7i&&Xd20$3 zjhs`e`0(20EaKteK|nZlN&qKYT>0mCaSWJkL)zrpa4eAB*AQ{eSiaGFd0Uh`5%7}a zBE%pnFZ#3HK;jmv>rKGUcwR+I7X7hDd3Wuz6Xa;-K3@5zXj;0|^u&*#i2r&0scsS) zvk&sKf1E0}8UAQ8vY%zGWtNzw_l~s0iqe<|ai`gec-Nm=Us;;mm{C9c{Y%bhO|Hvn z=k7F)j~_mqmDot3{^0%heY7kw9e6Q=_TiHSTIyi*SMvJ$G5=h-Vm9hRn&>LgK^b5| zoypm4yfnpg&PEbp@cj=7nvEIC9WB{bApo+#-u(O7Y!xo<`nV~-8+LXx1O7!VN20)h!e1ymFT2`WKy5J?iX$}$j@ zB+&>+mYjouBpeWs93`qG0mZ0~9_A=&Qnx#jgOGoh?||Kl-a1lM}L&qo$^&M)&YszBzXM(hh}} z&YpBT^+kWly!X&;M~`~LU<%)XUyl2_Ep^}Qogd?D%XW*Jzm9tDHNL-W^7bdor5LWp zB;$oGTk>JNL$C z0BC$M+>8)ViLeom)Ad8ofu|8J^&Ashhz%@_&&)-%YHLCLYX~|`y5K%44gW0dypJD0 z#{FoMxI<6JAFUK05~-fuAjgnKPuSa-%vQ~qcC zj#8liGg3$BMN~#dGcJIfq-)N9U)lUi(app9f<;;1z6soQQroe$sCsoaCBNigl0(W3 zgSwX`&SEZO$HMo0ea=+hrfD0QQO{lMoM~sPIr?M^e^T{0|B;Rlv6-hiBX217_yk;& zY1ODc?)#2Dx2XPH#7KRzS>oCqOq@3s-P6mpKQdytPJLEt>58z&UOi5dzi)HV_o*(< zdpIc%{UU$Yhw!spcQLf%MDb7?Hcr1+YI&0x1GU*0@S}X=nRBUXSLq`k3}@6`EPueCsD#IG_ zlQZ9Rd3NjwL-k--uZXcvFPJDIC1cLJ^*(Ip@B7f9W$M%1RBxLy$Zcv!pi^&S#|- z>gz2mXh5Aj|2`N~T5qTij8simeCNZ<3(F-^u#LwdEkBB9YmedOjxS_DidhAjTsZ`S z-H>fJL>-3q(GLN=D_I`?k1dl1zl4N@6a94RyBarXaWy?-lG`A;x%Z1&Htk_w&1!4T zaan|2BT5_A%i2N_BgJ0t2O~2DY=y4hmEIh=+0r<@US92n44FTFZBL(>3d>CDt=T!* zHhRqDM_^i zf3LrF+cu}<bX_> z_(t30+nj08*$S*l91}^8WFD>Q6JLyt+0g*A0G_0-(t`l49h@PvbgIdGY@q!SupeoJ zaZN)0d!!oiOG`ZD_k|%=#~GsmauxJfyd91LYc|)ul9efp_SFk&ioVSBczUjN!S_u# z!ord``>U_sUVH~mW24sBua_qf-hh<%oO~pQhx7-4^ALT|(t-bL*g02Ny*v&<4iN|NlUMnEG#DWac@dHls^pH;t{|d6 z_n!-pQYCr~FJq^^^~wwM->Bc?H&Q%x4I1Hg1?2^fJ+XD?oAef5$E#{$yvm+^kLF20 zIZA_&M(7lMlMX0FhGfhcx1 zGrjqrZvLNqz*OHo<@@aFmm0MlYpSHUMvlv+Z5fE|4nEQGS8GRa#M#N-`c&n6p36OQ zemR_YIh$iL>0XS(<6m!|d4Fsqm;H`MUSg5Hwnp(Z>Z?d!Gct1LUm%Lj;H>~Qe0Idm z8|~L3ST`UJ5vA3hD@_L0qyA42=vMTJ?R0r(KG8ir)qmW{b0sJbsEEy=M zeM-GWF9Ha=`f1GG+Fb8{fdPo6M{x6abB;SHI@&}uIc}vPv0H!E$PERX%uhjXYKs1mi`vus zN4pldY<1Y=Kpjk?aEcw3oI*mW|k3u0sy&U>qp@&8qQG1Qb9`sZ783ecii+JMQmMVbuQS{?Hw91&vN0JW)ryTu69 z&FtTN@#C*=On#q|pNSYGfj<-RC5ZnvMOgGgW#DfL0#p0+s9}Fu@?;a@l~DC>L}!H5dc9BS%9{f&mFD2SHrqMKZyx<4h4vNg2FqFS_Vl#PfB}%` z_)kHNK|>-uc3U;v9jfw%m;3sm!Bs8jl(h#+O4kyOx5tTATzq9Q?vns9c3U+sQeco2TnJa9v1>v;{6fI#6>Fll&M* zju)nZ97B>U7*A9{#{oXUs*R0L11iSfmMz=EGDxVVT)x3zCL*uqCK^;?MKe$Irck)R zj%8NWN?brVD|_PgtaJoPm#=`Ev2VSk?on{O+SO+#d{ok|$N7SkKDx$r845+fru)?V}*DKunX;wYJz=4RM}ZYmiJgq z8~G!wJ!nwuMq$S8W=!vRT6>nD512-A41g#ZX+y(eT8_=-9PO(JhnF;L@$~+LH{rGK zkj5+#ni4pJI4`Y33vp+5z#uq!gY;%D2E7fY? zDQHMPq{;5d=M8rm|8&nP83C(qOeWwhsn0-s{2gqdC^ci?iJ*wDBl-`*@+FCxBmW%g zO64?iLC+xp1Jh{O8(_zf3Pd^S2 z-RTppHO^=wUHkNGao5fbDush7nH=pr^`nZhhXjk-YMHza>VKA}qhU-Lg?I=#$Q9ZK zj{?%iAOi;kFyAhsn)s~~0}V3qjqn%)b8sY2D zLX%B=VDXz5vwwUPvl@rU zc?82UBMz1c<7k%$pO3w)oT-N5gaF5&yQgrVsX#GVcKrE6v0AlbmMwY5k`cFNK& zQ%T@iM3ELNDZCL$y_j2;o!~u_JBqm3|KU`vkSjR;zF>7;OLRiZsl z?HmJ#md0AStaco48FDX z_s4ucyOiljA#GZMCN+YLp=JypNF*IYwhM5wJ8_3dmxPuWm~hUoyF=Jzsd5u@?>zth z@&00#(d1OY^$$Mby9beLV;70SL|Kmseclj`o{y-4Weg|s8*Psh^S_-spR~+375+|I zO7b4YC%_t-R_T`frJ6|s)_S7xBBn7&N#@NU8k}R7`eT!sXOvO`WH5}IXdN&kSl=4Y zFD`;q34io@7e>AV&{J>4^^I1HA|8w|Cs~6a`J2Abd@yWZ1{Wagm%bvX^hf4oba+=O zqd6T`OFP7dykw#*A-9rt()WZ(s$@5Fpmq5tnxNLipX4Z@Lw6uU@j)7`FBlKWQ2}W= z5_T0hb_h(44Xf6z)3WErysZZFYSka|ZqY(Ljq&z&O&7ee2?z24WGl3AN4sD>`)Lt3 zr|{OFe@j5ao3bt$91Hkr&z-unoMMIDc3r$RF@3Q###Tfln~gF?97Bt{zfC~M4@?}0%^8W;gyjx$WQCqKFy?uPr$ChqWzB|& zz0`om$BPL1$S>i@K_4A|xYBY1$D^>!P0b=|e<^g_NssCujvU=ZOO_B39kNb%HOSmu zz4bX(AaVJ_3UC*LU$(xH{a5JW^mFUro3Iglo^p)z5yfQh@51PIt+{hkK}sP_xDn_k zZ6&G|Oh+jKfTbV(NDiT!_PiU-ZAp$|J#ero$4>3vltKUVC(5Y~)|l`W_ZG^3I)X+xM-}x)A-M4)l(Fu!PKJUxUfwum6F( zPXqJZWw$(uH{6yhWOg_V z>I1?OA?hoYD@Sf_ zrw))pno9Efe}D@qqTEo6f@ORq4*(`9e_ECkpS+mN5x;LewFF?BBck~=uv834<2kb&K&lxZOFGG zV>}!q=QyJUhp}()%b#J#7KNzv?0ejuJHrw^^qJ!@zG0kju(5s77W&-Jj5g2`6ILJ7 zCQZR)QluVtmi~+AA4oe1r=4&;6Ju_Uvo}^&?M9uFMv~$16EiSG?A}6X_Z%0Q8&ehwSlNilo z(N!3Zz)2?w7v9_^sf*nKszEqpGptsMDCkucJTYSm9xaoet1D$<71C?KpPAg?6KW{HW z0xI}`_p?0G#0Jt?0LzM>Tg!=>%HtbIG`>To~48y-igt<3Q zxrbt2Pm_D+&SOFL6S_pIwo4`Ddu(|055nLdB58SA=wS1gmt$}*#( zjf6kX^^fl2u6q(`Wk~onns?NXCW#t(5j}siC3nRm1^_U=SWG0-ARpy0A@pPR*$k{l zRgzae#r|MBShx)=<~f4#xw$kXWf7v<9!SGv&_l>zMi^*+3~l=tID zIn?&xJPF;f&9dQ17FHi(1|OfT;N0*s%b}PD9a8THeU!#zd#HN zb*%ai&Q)3-rcIuvY9s|LYX*J_D+}CFy>msh7?|`xFB1tLY*8E|jffkh8tBQ)EfsdD#%Wq(E72OkUw=V*p zjdh~wM?RLbk7%Y5^QDR1$vCozDB2UnWDP~dbNyZaTA`@3>_2V2H#Y=a{@U_N+0+i? zY#(sTs{@WyV=|Zh^zgZXZ_kB0U5LgQ{O8L-Z z95XtUjq&gMvC(?oSh$1$K>wljF5GOZ;2PxY;~@7r{C|uCjs6K|XW~&yjQ~r&=+GwQ z$50ajUi3izm)b~eM%vKnkbF7!o^GxkVqor6+Vx?*r7KnlX;@y0yi@>Y5xf@lb0vuX z0z7x9%}0o5P1L2>c{0i&%Tsv@=e7l>Y$q;TB6;d%~qB>wMb5cZwND_yhLZAF8X&w9gD02>N1Vw$aiIMl4Hz`|W+Pc4biN zYUQ9rpTA0_dCQJIA9BolknL_mwcC(rtrcLPl2-4`b%y@F_J#;QzC+rs7dUqm>S{M+ zS=ue#>D}zSY4Gyo$2hBxJE@1@UAhf{vU=AjF<~Kr!}n-C4|hjnZIanz>qhUhrg4|< z_9m+A6F=l<*_vpZyS~lm_|*r!$|{ph^fTAFm-v~gC2QDaL{97aFx~!Z-TgbEdJp*y zU4GWKWphTlwos%E(g{cY9cRNmN%G8pA6Si_4S&jBjllgsn(M8ff}8$7YDMhj{THnW z=i#Gp2=Kd$aV6A-qQ#UJTg<-O{$0-~P5kSb;`AY7`O%87XiT)ExI)tIpUQhHHd?an zC*fj=HA`0(2%JJu7m~eU3Q9HTED7Z5*-2&th!p4?7=o1BmnE2K+bx|s5*6l-7DXod z9Sx=`2sM@YUR1Uy*VGA0|8g zmz)`UW^MEd`aXC^8NvKnIb`$}uXmq&+`9g<<)N8Oz?WrPQ3yPp*da>R(f zTFf5?^%)2G{k85FJs+Q@3cd*}hmo*SmTjuG5MXY>>fIalD?;Uk^Alkrvt?dqt$>Ml zX7uIjeo|NP6APgq>Wbuy(2dCiVB$RS5Ep!o_p>604>}?K(O?Wz_y=i4gISl+XfS5^ z9K_rjlKb)dLz8*0x8!@pN82N4|8jo@CVe{!9zo&dd`42~_d$d#1olWvV`+cGF?|r2 zO#$bQ9WosFM#IrOYlS@Pb@MMF0!j`3Q;GqVI|qPMVyyPPy2AG@6|c4)7PXvsS>gKO zgC?Q7Q%s^T>Fyl>iHJFWN;r#j1pBC!|DAI-teUG!<|kI*Wm4yy*&J^{ID8cNqZShF zaxfw=*Y!R5P;&>o?>(*dua;vqsk`qN@PaVcDLtm37${{}#bWwI$=?R?Em?C_7ZFCx z9R>7r1;#GYD8jAL%!4W9rP#k;J>bB_8&6NCot6mQkTo+tEQX}jQ3C#M?Qn3tkFtXR za^K5ZyriY4=^x{7-yWjn;2gBqPKk%qW()E801JqAk^1J&ON!yImFzrZNaL>aFQtH| zhR+poEqdh-DsmMv7?XF4&rC>CtZxh`UlG|`K07g6O?(TaX&=2o1xzHGy(Dy&$B@J% z>KS6EhCuA~36}fwr-zb9P_yA;Q1?E3L)a%MhzGqszgk-E-!ej`yJUPTKR$>ek(7HR z3#VajCU$H9pB?%)m|=+Gj+2ia0j{*j!Hv^JK)j%kiGn@p zRrrHZ59w8g@zOCw!HpTD|1CV%gFSbtAD{Y#RcvML%*VxXx^MRK_X)Ieg-zsmXdAeB z48#r^CGX}R85y|Wc$+mnw&OqH1Dx#tdc>N6`aiyCs1}IZ>{PtKYq_2F3yqToQ}G<6I`5 zd`GB0yzDXEtR_&84D}Oha*$DBp5fpB@f>5;Zs|H37*MvhAtl;8>a0!YF&*nFHRgRR znTI&;mn>vHK67Bkufx#)TwY;ba5^>RK;`ecRk7hr2Q@oWw4m(K&ikKJ_XYcz{@aAM zp?~V;vmY64wOT1DRQnRhh;cIijCu%u@WyKDX6FB!!K3GjfY>+mu!=(f_U*yIAu6aa zq@%jWVaAqffNoO5V9?D5A&kT#^m1*XH-FKm_tlA+h>2rx_8yxN11bT&sFO|3lljpN zp&#|;nKyezg9}nHy-mY;dXYuY0u^v$@r8U?@4=9{E#N#%eV0Ghz;AyI)sg- znlhxc1C06YLrA3v%=dFy@dFPJs_gUowIAQyre^zhSJgJ%{wUIBb`tKN=P!CLY_e*j zCw(m9C5`$M?Fzl79*K)E=*?LLf0aEf8&yxi8mKnII$qnCi3OH#des#V%)Y1%)NdI2CduY=n@u-ZsmY@At$Iwyvk7G$lHzz)53ZZEV4c*TxdMW8R{G1a zu>obF015L`a%@S4JsV7X9{uTAm?3JR=H(f%op+zaNq!bujLY#Qtc*dbaI+3)D;$3b zZ6uY4c@&%`V6gdoh*0h<&gPnAo)Rj2HUu3EMmxY!VTTLQpBx#^ef)6~?EU@k_Y4MKZ$`w17Dy%JSt z>@OrAi@$*RPmVne4*U5hL6O>NsoS6tz%Fd``%Mk!QR}vN2bj|W5n%Ga%0gGLjV7i3 zLa=t?Eru*W3m-?@Y^6rU*GNWsF5;dBhKi7Ik&u!7pH$Q+#b#m#7q50ZFFyT`E*#Xq zw%I0SnCH1X!;nIYiE;mUe&$ugD_7g!mIJLcPJQ2gu0d6%I< zzaLdKX1O?}T1;cm>Po@#Aly_HhgkME4dP*fp%&?`7K%bw%U^?O#^R$5 zW}OlOiq7V3d2CFiSFNZ54x3{DQ3pE47loU;&83SQ-Ns8DW;E1t_?R^aTY(^Mj`247 zUuI=KZayrKE||ypqUWsy1mlrBQ!QMg;o~9UKzNv8%Hig zsY=ZCP7!I_RJzPT@Suugl*sr7D$5xk?h6*G8XHNZ_r}?u< z3TbbcK{a9A)HQ)lniC3W#hAS|Bax&hLC6oH*FL$^@Z{=`AKThIqGK^L<8_Z5>>Bza ze|V9l+^x~s)f(=+BKcpZJ0(4~JRpP{am%{9X84_SM$X9y6}xY61R@iE`b}U;%b<3`#$W-UxV2**@%w2RiCo z?TAtA23et+!5Gv$h(RuX!XEIraP@!ZC|)q;e6jc}ePEh6mq#=8di0lFKe8twW*v~t z=aQ$IW@^K7GKkBfw@$ZH4P-&;XwlE96X*W@u8-ocN$1(->KN4%(R#{~w)iOY2$|T$ zEQ&K+d*w)jw(0lvKkLMak{^{9dL_|HeN3WPpOYCG{P(v#GURGh`xJ@T(;ibjxL>Wv zTZp@vOE*kc+t!$-zsc~X&u?ARo)f)BTjk}^#|^4~gt(mB5+KUeSf`R5-|=g@?c zZ2zRy;8)97T}?*k`04|3XV|HDG(N&ry=e#ZqW>30HvffT78zAAMVc$o>d zWd%5oc9Ma5g!#xssVO~gLng~Z9u%>hQWEjs$&k?9kqyGkyx|J!#eW2yPlq!l(+oKF(DyF?NB@m?(XC~L zffQH?o{;(~WhBsa!#N$;o+R6RQYt3J$4@?z-|J)$B_A4LANr zL*O_5M5q4PCx&-vR}$z&M8V;@n|U)Mm62IM$K(yw&a$WGQjo(7SkO&Dw~1((vyV{i zh-*^k=3Ev&ztdUZdk%tcS=85GB+8e#hb8=$PP|p6@gAAKG_pbd}T;|uNXr;6pX^IV0i zAwiK6{gm|d!pQ);*RNk|g0n}vTaRo9@1mUcqjorJODgd(zavbpL9h&q&{v+DIwGY8 zL9@EAy31lo^ceXy$cfU9b|lpJj~ZUEI_4?QCS71L$OE?9NB~x1K^}3A(AJ0unv5Mm zTB<|9G!q^gM&wu@rjk0?Ot%K!5&FcM5JygL6(B@norq}o#-P~43lZ)e^1EVfI@CJl z;eu_Z6v}y+HMxUua)AxgiebOkWEuSej^5H$e=TR)Om++ZY1*qbR1n@Bp+{?h)zEq6 zIPJUSU+w#dUOzewwr4#E9hm2MGDO#zT-N9GJxViQHIBDJ?)%x zdLeHwLb5~aaFMV-2NC9oVv+hem42y;;v!5A#V84AGCD6OkQo>_znV}CMj<8|u5?Lu97VfUTr8adR3yF&#wBy2Hyq$jo+nxVi(v051rck6{Yc-!y=h z-$2x!rzGdNkcjy#f@GZJxd_-#h=n$%yQu!#h&xyKQ4qsZd7l$ABm9`sHD_~>5)7BA zBGfp%&|uH=*iGbM{JUrUqiJN0<&Yc0f7s7&Cu~5x3EKox-2}Z}DDn-0mQo5H#Mad6 zEbi*+3iT>RPBA?C%O{+duX_AtfiYx4&QoAy&An`j2av#V>;*JFA%+=L{V>L+G;EHb z?`%vobu-odZ1sB6?fgQsBki`WOlAS%Hg5;{Mx6h2+J*^;HlLr935erwD>{b)_c;_3 zB6H!N9CPjUx$~(m!#sAvjDp~}q!=~S7SP9Nd_WP7xqG)Yc3>B|01$mUv(S;DJ@6c- z!9d~%>uV-3w@S!x7h>+Q${KO)*nFm6_hsKT^%4qEyEfO>ApT4Fl*s<891 zPitRx&FckeuTP<{c7Pj7BnoBei{$U8aENVSn{>;4S4elec$MWfA7Wo|yY#JZsd-P& z0>T^MYQ8X4*hnbABO9E`*a^E~RfP0y+QVX^M#?Lgw-*q`Jg8MPn~#=&IkJGy5Wz!H z>1$&3f4_|mPzm^;V_O6_2~A9pP)4%%WMa7*Lw>W)Ar*41yp5PV8ei*%E6m8_Ea&-ur}`S zg9i^_E_IC_y|Z@Kk#F^xRFoT>0sJ_@>Qx$$%n^J?6wB^&_94gythaG1-9@XF0M| zGFpXzy7&Z7=iPBkZpB(r8Hq=iVk8s&TIau{P^x*sp&DC|BStH(6oET9G0ju68$KJQ zd;va`Cj$4LRkQlIXaKGSbI;*fGK=H%^P{V%G_x9D+DB1{Q#sqepC#p`cH*myN@*f9 zO?eky_T=nAP`1g@G8=EX%|x-XV>X9fu~zfQ>irAWd7U*`Zz|JsrLQw9#5W`?)ica1 zJxtOka##12)Q=Awc2yLsn5f^|F1=`hm-FG1Cu{H9ja!!3-qq^c$iIBAS=V%*x!S08 zkQqy@HrQ}ML(@`+mm=SKEd3cVl-07q^b}IH8pGtq%K%w*Y0OJ5m8*qY2k_M8&mtEJ zmbtWb>(*GvXBqd|THz7lku5?wq7+Vf9N7~vEmUvd!P}A{ggE(*9Rz08~Jhn3n`ED#y`tDFatWcI)$-|yjG{keWi43W2W$J3#Ybv1tm!H1yaMZIkz=$$jv(XlI#gw9fMKObu|?!GA10FhH!jC!^R zu%8OrTw02w;5y}tS?>?v?nIvXWK~#qki4VR+20`-lXCIk-dvyUF#CZlsFO-cI;H*g zqv=+#Ev^BiTe$#uU@(rjAEHGK z5ao9jX}vRsOT`N2nxY?K`PakHgW1`RrZ>9)w(Byt1}AiV^HisB8*ZKxWX8dyYlF|z zd+L*)!7E%e{bUe;&qQsR8?pRi*-h}TURWTfUk!xH^b7gRV^0mCBO2ELR5TbR4u0KT z=nM0-{S6s}8%z%R{UVi1t^*%r!;C@(=h>PRf}kFe00ZOxSecvsS}@*Z74S0Q=?KaK zeK~BBIwNCjCz!9%y8+h04js68!G85r|4~qhqOG=6M!4wgF zKYoHMcYs-ms4h;JlC$10G|=K5JE|-;WOvl=195a!KO zN#W^p=SQwrfTEJo2ef|Xd=QdDzAOkW^4&KbEdLew3ojAL?VyK10C)( zQNH8Dh(qP;8Sb@=`w-m6@X>viZ9w{Q(zXeoOazPm8P=CqnmnG-=mEsge0t#qcTOZK zd&xRE!-Boc1qqNCtY`useQ3b=`#@|Ig5Hi1xsny7d&O{}UMT~Pf`lK!RKt9@g zYkkPcLtOKaeWe_>3-Y9hU=*?j3dYY&kGRvnY0)YI}e zOu7qFwIom0SnJfU+sG;AZcE?Zcax6i&cYQU8?&Z5ctdqBNltxSsnO;l?1~1ZX!eU0 zl%3%~1IK?U#a4C?T7kW?bdwP8uxF1`ja#d;XUzTZLSGLv3Fw@(H#202PAijU`TjIg zt6@ZZCQ86zW{R9efnWY{%<(7{rcmzH19V{nUrzhWF_Ia?wz-PsA!n#)k2mjck^qOo z`!)sYu}!krls#K;L&5`zBPMyW94~*s%M8q zm^T;5z29EScKvv&kP}`nBsvoF6b{2v@+ss8WOozb?tw0__@JXv%E!eql6Ym7f=#zs zAKZ>*v#7bGZznmwzAC^p7dgwNRefS1EqBSYlCjP<^<#2bW97m&Oe#DBZxrw5YdxOX8# z)q)p0;?^957iOV@1!#O67*jW#5ZY-CrM?Ez4eLfsU9%lHp%>cId@ZR)73qZrf+yx= zJxjP2IIO)5|ud zT>#wd%szeW+NW}1A-!^OSuUeNn>Wvtibl$w%H|7%rRcJ|x&$9*$ma}93zf;G28s%X zI0*0v^(X23ooQ%2Jfu-xb18OQDE)Be1bsz9D`&>EazgKWZa*PoAI^2cTCWRqYF_9j z7&QyZRy95!%|9}0uX_62m)CT>&KHbQx0+X0*iZJZhZ4Zx%Vs}ee|~C69deMg z92cMNxgzOQt!BfW^(~k;7l_M{0CzV+pDP9nN}w~v0M@N55aAGs$|Bq-7VuT4y}RLv#rBl=L7yFZ#$+jK!?EHZQM*cyv}tF-_N&~FiPzW%7`xENpBBeyw=4#H1X7l z=bf?rbFWhp~NKNbJ}@*ca1Z;XO^ zXE1|wqwKwdBTW{$SE|5oK9!((@VNJZQPQQZYg4SPMzwS^vRyms@__;r#5r9 zeil!|?9{`W%SDC?`QO|^-#!sD@=H5iOWy^$n%suUQzK?EU_pj)4S&gAawL=|Vpg?E z(NcX_C495%-WUe~{m*{w#r`qQZV$Sei@KvF>hsl(oGC4Rm-XAI>ga)8N-Al)^DkZL zT^%K)>s@^)z`iDUpjaa&gr_yFJK72M#RBFmCF{9KY5hyQGHaT3-|= ztX|chs@00pF7;g(v%+pi%xIIwMFHK6iSdei*Y%4F9)i!BU{uLz(HQICr`V)WIxRX| z{8YZwKDIW*E4$gSKX9d6aucgZ+G*b9)|*C}%=$kJ97aNB*9ySy{pp*tUONMhE&0K5&oQ;k->2r#gFWzZYe6O?{AotNIR)uiRdl02jfr_b#aQ&izb z;CQhwj^(ZscgnU4=ZB_8n@ja6k1gt~+FUeH6Iu0w)dt@_+KqS zbz9R#CgmL_6xh~|x;ad7$QE4(SkmUFAAJ|AJ1;qFILdaF-HYw7Bywd<9Db z`~Eum;lxdwv5cOidZY2u?B1k$9z=caj}nP@T7pVBQ$n>@H^f`qv{|;ML1Eu3i4OcR z{afFTM=e3Ge;xIdH$h@AxF2<%1_ZicP9mQk|Ivz`lDsN)OA5|}Z*_x$=uI0(`b)Wj zo4E`~vS??Fn}Uz*+QPQoD{7N_l;MNMdZnYl-f6Lr)uty_H!cl|OX>cmOoelLm}lV% ziP+M<8#@t(9lsbwy^>J!P&E+I?#X_pJJgzH8c^5kmd(z3#i2Vvr(V}TrTt0Sslw8J z{e)Xi40-fN4A;d=j&Gcq$$RnXgYHnmk{2bMQcqIN*Q`IzRzs6k}_HOft7YK;vclI?V+7UP&fx} z#D_myf_hpTF+OVZK4gYtMd~vTLbSjf+pPYZ@P&;+=^wibINVOy>|_7?m-KK0?BcTf z4Ps}>xKkOWTE0fWF4Gw$`-X~KeV;u2b#$#T?$>@3dFp1`olxLk+V^qDecE5yv~SQ; zLGCU|yLz7p{oXKCJ9j6m!;DWSG_C-%8p_OK;rkd|W=R%!XGIFVpt_u{bw5 zd%9QLAk^JMZto5w;Hx@S2E*Ndq7?PKts8Tl*Tlj>#=;0MMSg>F#_|zUgNN^44WnIA z3gfw(o}`VxX<5=%tRR19MAas@*3#{01(vP*Se3@!{Nk`R&osZKd`dXnMeb2&NdQCR zUZ1A(N+(`P)E>}O@)Jl@fJ$l5!;_04CrqK1tNTS++}99f#$4)W*1ZUfTQuE|tC7Lb zuu$+eZFM4TyY1XEcYDZlBDg8#imH~deUyms0CdCcF-tnep^~X$&O6EU4>#qByTSc8 zJ&W6V#*DzOD|>nAqguiCDX0ED`eurfGv(8oj?YmU4PZ(qJPJh{)ZPZhF?p)TnnE!C243Hm1 zEE?9VR*7P6Unn(<<$=B6MjV83y`<(WQjHKyk7-4(;ZCDPJ-6&JLnLwIrB4N+*yT}| zUvS<5Rnv>UcXWL_GKVmYWZ$-0*})BajaKK|4{f}_U29PvYCKU-(zmfzEnywaO&Ybb zSM4Uf9hx@NHP`4xL=EQBsirKG(wc5rB6$aL^DXMmnHlecPFR?Zfh0+LlGVSII)ad+h_tc*2D%a``T<7}mtJxJfC!;__KZsvF{aaPm zXf90#LR{QUvMF^F=eix~Cc7{62#CdsUd#?`4ccaO!e+aL!}U+8DJ|{#L)Wo$kjf-C zEHub(RI1mkPB%0cXpK*`(&iZiuE1)k;VKOKc_?mu^~>gozL&?vCqBIY5S0#$I(Mur z=Fn{(JB??YRFVf~3yb?D8nAQMT|41%&wu1&&G|GEd5z6u%>N7^nA8R@H2 z3=A1~{n$P7Rya|UYq`Hv#!RKwE;Y_(FciDy6hf^JmdTa%-MxodNRK(H-cr`XT=?_! zKv}lB&j+tGh;`e%FBr+F4tsK!jKEhmga<utMu*iE1_Tu3*CG{*=5doZM6fInJar?eC#Vu1YqY93jz6V1F-OqMJf-8=Fw<2kV*^9AJwd)#NHUxYh$q&-SjSbAqO=Z$AM*_|h`u zkr^`FQ_CMyy0<$r1$f4gpC52bEiJY6eGc{~3}QQy!ZarG6@YaNNo zb4fL4P)DK?sB~EADyOPAn;sd}m-Qv}%wbk@wOO62tHZgn`VqqFOzc}rvwdLul;oD_ zy4|jkqcy8r?ao#wpN`@GB72ley7&%lmcX<-kSz%tO{#tteP3tvHlj^q%q+OMNcDPx zTc+E$%#7F5)!B?r>%DlMo7E_#&veZ(;+X#6F6?}>btM|EIYqRWl^b#U^B?YbU`3_@it>U5ES;&xG6mYHl3!U&B3~sxMSx{NRb1&WSV#;G%eKuY}Qj}xo z)CM+WOw4e`?fh+qaU5hDT|lai|Ju)hQ`D9QBgfmz>0*IKI-deQgMC)1^_BN>HixLYTEJQ zIiQSOH~VkTL|`?fflLNiRCG$~J=?NU0EpY*Ip7{bdD%w_Uo@KBK;zF_x-GyxHr>?gkJ@2%`w$z5Q+6jb#j%noL=~eo7<6joU*!AiYY)}9o zP7*y(Qq4j`6#iGkNKPujX-IOVx4)o14I%y@M=s)w28<%dmdR1A-VkIP=V<;IyD#%SX<`g^)82njx|Q5J zT&53;2}nyj_ALnwx~^xC_BkXL>n>F0lG1M{k&`|{AmKyvN??(r`8nXpRsi5NU}r!zKV*pIrCq}!{kIRhU_vMM8~0g0wO>zFqGzwha?YC6gccHJ4Hr$V4XuR zQ()%RaYM4h8o?6UGd9z5nQghismk#w9;aOEtrN`NKS~NczqKQXi4rBCeKYR8rbAFf z9A{2|nW>O?nxRR-8F~5dP7LRaE@W6;+NGK#(#*iCbpEU8x>~qPoS7J&89J}ya5y=* z$UAa0#&Dp#b;@RmV>k^ljY5Y4E0n^Io3X-Hr1 ztW&ict{eaWvK49ATCpEHMPShxBtGin^;Ho#5y2&kK8(aQ{A-D!hG%_ai5r}U+D z6L7sSW)!obOeY+o{n0(i1&+!Hd|eH9;bifzgp5FrGaQ&KdvU6DefAyjVnk$@aqZYq zNP{);Oc!?>#Dv}koHDWhu13W*aJ}rgInF%TPRT%ZTf|BvcY?o(rSWa0I)p_P0}-m z*%{M`&!PE9wpfw1Yud|CW6!9FuU2PR@hvgNjvF7q2M_F8t{?YN@I6}Z>|hO;+IDcM z6EGn}R|$?AB?}qia&8szr0H{xCfu^6Rng0%lL?UD3`3i{Jjrzf)7!|>)Q zPk6a>4QI;7^^S=Xkxn(3gTBV5H}M=BV{+2P)DyVkmw_cYKLQ9!dPgVCzV$VVVT7i8 z+H34N88w%kxLY3_=ehe+pX}qUPCD+RST`RX-+ZrILV)X;f~%X4=&*{Q{f}n8g~q;F zA@)4;EHB;CtL$4SQl@LZ?+z#5BBxtZtzst2iry6=>=W$N&N7RC07Zx&%1T zH{%vi*V%uaLLuw&WEKoU@-ZfKJuPNo7!!)Db8k@%_>L$B_S^N_3}Bjg@Cq~u+DZtY zki5iZQoPW5c5dQ>M@ig3F9)li^m4Go27QI2NtesIeVia{VL}`*1uaYh$4o_GeoC;S|etB_!IAF%JPuiXz z$j3Q;Fd{8j70^%yU|7Soa&WxFGHVSX3fff5TdbVn%V%8!9}BrPXpP(@$pP&cymbhy zn+cK6&plcr)Ti~(M578;AHt)c5fPPi53L{slgM;fh~=Z}%K$egXJgwo7F}f|DQ_-A z4T-eK__@p_SWO&XM|z4eSOT;Qh7c(^WLJtF4-@HQZlJy0NtU@z2W?A2&^Si$v2}@9 z0dGs~N8%r%rk_IJ`Q9_vy*rtO3?7asmY^7D12W`{e1!D1au1MSp&e!DM2esvsDVt< z^xck7tPbqO;~Kc4m!G)^z3VGus~5PK8S4vf>ctp0`-6@p^? zEEp-gXu<9cB8^wA(bs67zT&g`+VXIZ*0BayD)5=K za0Ud>G+l5*$VS4aS>VnnX>UtFuHLYRB%;_4N2t@5`o_5~HI8P| zqB~hYsI;-U3>VtXH(}dzg>C*@kXX0r_x~)+a7t+ME=P}urBMI33{PQA`U^nZ&$ z`Y42Ql8^Kxhv?elF9szR-t0hwYaN&}mkkoiq_NaVCm4a{#8le)nN%t+v{gwtM zN(qu)rxgMl$v$wr2>Y>_EL*}F-Mg0#-@8KqX0pDmV9C=)%4!aDRr&iRRPYe9qTByH zgXN3>q41B6FxN2t&b;!y^a@J^o7opjGF}O~ejuU0j;kcg=8#_%4}6W?F1USWTYNP1ysf&b%T>swOBXod#Jg|d)b)m@rNRIsinuX;x z#-S-)cUdaf?lnGQyUxQSGG3RVhHZ;b3{2tO-YIc%GQgrv*A$|nj$sUiVMdR^LTzE? z1*B&hu-XW7`jq~LXW)~7=Yaq$z7uej$w)$~=TD$XuJr-Dy* zr%~N)4UNwaa${ve`*cs60#b7M%M6eO3e~#eM`uvS8u^`6I;U!W);_*bR=ClV;43}jH5zWYhH;njCxNuy|ZbIF1$D1 zr7<$3;iGCHqZ;<3k$2`T9$toKM3cb@xY^#*a24f@(T~SA?=?PdJv*d<*q)~-79_u~w_Mcr>o>b5EyUS5#APEN9?+^RG7oWgoj z@QHH4nea=a@4CkoeJ3I&Yp?k#=xdD|M$*g89w#>5IhUXRwd+{6&;KFs&Esm$-@oyL zF=jM028F0$W$;xXmGQw$$WiE>ql5cUoa-t+2@z*r z=LsyPwXQt*`Q-!NhRxm`Z`ols)<&;woxA+c#E0bP-xXXM8heRBVr-DGdgb&`oQPww z6UW3aG`6uIR>k_x*22=551l7cLlXQnpZw3^Fm0}Jiq~(0LS0(Bhkb7<|3hl9N+)SN zq54E|Y@m3z78i`%ISJ=?+asm~rx$FLAoUO~IB*do;(Hgha1ndxMYSJ0RQ%4{zsnRH zr-?K@oW1KDO79 zcEa%y@e7|h1VHi2t&nCiu%rhkwk_VPU&i5HY_nYzt*wpa)+roe-j2>!zmI&|9;3s+iu-htF9fKq8HTL80@(<6UL3rm|jy}e+x#ze+(P5|o52AN>1bn{Lz z$!k;pu*R|UJjvWvjL-jC5N*CjXbH9hhRjN}hT2QtYyKKxocZ1rxl1yxu<5+@6)P0~ zQ^VUNE*V{-Csuw+^TxA~k%nBBIi&B>9$JY^tlK+&#U~BVY;`UGkyzgkpr^~-RJzph zFaOPli%r1*;)83W(F1EqL?D=@2#`b|11Fd#h-LhL9QqFOowAPlJN`tkYnSzR)_3>aIr_@M)WxgR z-{t*Dp|=$SlZmt#CDI~Rmh{ogiDXOSCxf+D#WW1`&2}Bc)-`Y=M_cI#A78dKXKMAv z0oGn23|l?}8M^{+bTi_g{Q1hES#_Fqo@ri!yAlHAFmO`qNYxer>q`>i24)7TwNRyd zk3CtI6eoO$)3JOgYwk;}lGM2g&jL3NS$6rl^BeyW=H6TO)>=7D3Frk+dt8Ao-(VTe zpc6wDgT5mRCq)(!7jRnP;F}6do;K1N*F$Hc%VplQ8&_l~`micz@^uuv$&I$^_RjS$ z`S#$#MZ*2iM0;dCj&3gypSee~yRTImd#k>9bq2axg`8cG!uewfj(>cX)JC8N1m>Te zy>sp2xQa$BeJFC!v*k!ma`WIoE6LD6XseqR+cm%T*4eP;$p(@~0UnWq-b2P;O29TP7=+i0w;TNqqxGo9cC2Tkl-gaEaQyg~yw( zh`CDqVG4b-Et_(9hyUU>q|D%*8Ew-NIRN>CWyj+FB9 zrm7L2r{9pu;hA#p@AlVpxbPz#q~L**OQ)y7L&Q$MSc4j8>!+SxO#|qOzd?0>%pqsd znd_T_0eC`GV~c4)t9YkU-{L>{Zh93YH}&`uya-INz5ne}RPX3Sj-%iDNdW{MO3RFR zNuC>`H*IQ;|CTelrOZAG=m!?H!PONX1~Y0$Y@C{8YtNpSez^<^EcKvCRLTJirNRO`JNR0ul_n`unzO;3OtR1He1ie8M|}!%0b51URa{C(d;dXlg@$ zzCB3ZQ#>fsYCM+HMatP%*mX&eNEkB-Bmpte;M8s@V4z z7O}de{_Yxx8t`4<5v};CWhm8wQ>w3?t^w8U=oy5dSgORw3c<153CnsIHEnPRKco%n z80zzSBqFLpO<3Vi>b~aZAtBf*smfMkC)|A|a42&X20hOstOczk90XEor zy{K@Q3HU!;x8fIY97<*WNac2YaGXG+a9o&)B^VSZ#edbnj@t2UG4l^sj&~_|&1l6a zRQB{xyM$6xkc4rI?hdZQVyUMx;E+pqz<3{INjC|4%C|DC8Z=<$I#~k_->QvJCCezP z1n}ybLofWY=aA-w&;VOBd02qdA7iP|OO%!GJM8k50bG&~-hJag=?wGdEt@`F0H;%E zB_H}|>CcB!OT~W=ef#7 zz8>|k5p}Wd*dek!%~daD*4O~xx&~bgvBDv>k{vp6M}G00X&Z zzRA#+I~^zm4xKlZe;+V?`tZ&q&tK&-+&@p&-fBmn+&a}sZiYVYy(YMF*vt_BsvWuQ z=`~pyO=V;4%^7@)M6!}OY0i>W;hNdWmht9S02u2?+61`9@SU%kQ*H3vIyWvxGtqu( zUtD2IfvbxFOKD)byFjeNLWTvnt-yt-b-zQT5%=7k_@tc=N;f?1JAIEg?@CzU_iY)y z9*G@-7jGpO0*R~t&r%5ey(NHyj}d~LF8n{+`e^C?8Sy`(bo5b_k$uxval`tw&GRIO z49Fyx&)%}-!4}mJ_E4;OLc4I5qQYQb3yLp({bL8Je=ih^hzg{$jQD`v{Lir+W1ij~ zZg_YrTfwoz6_~-TZ+*a0Yczw_U?aL;XV{3G+Wo|jy3o4Xy>K1NAk<~hoBqR!7P&XYz-9%+`ULi99tSS zCb;lg`rfovc72+~p(1tR-GeHLzUS4oGO`99_b+o8&B|HE8JR~AY8(guj&5PA+ zQRmFpZQ;9@O3jsYEQ&tHZr<*Y)h9S5WNUp{anPPpU85~|54YCeU`I*Ew_F%0N$Ltb z-d;Rl%IAE%*Yl}#%k~Y6OBDyUa#`hi_Cb#JIYW^NLx~AeqHd8F7j+m{Oy8h6_&{j%drb4# z%-xm+{tP|Ly;JtUK^RRwS{CQW(-s|3QFguvOh~9TO;KW}>_2Y*Dl2ll*>Rhp4EZ6Nh=mZW ztvKOmA$(6B&o8)4kYi{ks;|hS4Ud4!g?Hg07d!&x6?1YMruI2WGd~lUjDI#RWm{g0 z$_L22ol7wx0bMP*(ltjO3|FJYm{2KxlDyqo3!1r0d4rawcX9pnFPjh%lf}GMR*9t8HCii8De(Jfq236V2iA z6OLc?DXIRFcG~dgw6I=WE5dw9QVGH2f7J+1G>JP^dE2$MAb5Q2wHrfB=MgSW^Rggn zAqHvGIlaMGw~;3@hB_jg$G5MeowR|{?Bwe}e_T9BD@m0(B{?6`>_r$w;Hr-Zv$laH zCxfQllMjNr6F*_v2Js-|n-Bp$PCa;dm%YA-<27pXMg^ngIChW@%qCL_mA$GJ*O(^m zUqzj{ec63ktNUt=D{NpgXvA&trBqPOC!r@5FpMAM6b7!&WbBHVQeJVW!{NsTGEu_| zzE^v6Ep<#12DX7xpK%c=E?cDUYe5Sr1JUx{`k{IgsYA&Jv*#D6V6hDTK?OUmJ=6?K z`Kk{QZ9k5!^L5_ymzaw^v;<-DCeAIowH`R?e}ZppKG}K*eQ%@Ibo{!Net;-5d62v# z)NXWWBq}wd_ z*TcISP@zY@ASwqSh$$8#u0o=`;Rr{e&+^r?@QPqb_j@HFKLG1(Wb2%Gqt*^^p zfh}=(m2R*j{-0C#mN;C&q!}!Mmr!sSlG6kDq2w!6{lqnT6c8v1C|su+wgSGG zDXhqx;^{jUtv3F3bnT&6nU(gse>mWEZY%gpiwW$oc&^8CD{}_P7@-sK%yy2NLq@wu z(&@+w9Nil6i?5=kk3aHGJ>Y4IU=;>a!#w0GUi=EMut-&c^6RM8(LWFAa4J@Os7>HR z)~D1BCT^O!?&j;hz^4ZjIaUTAdduQmq+DZkuwyWv4t(fjt$nu2`|LR;wou)LZao0Y zyBT5pq(&f)3v{vcIdG7TUKqG=0ZLl{`J5TaV0#NB>`}d1__m&UO${#=d9x~!chHy? zi5u^o;>fQ;wYdmAKFJ}1zgc+X*#=ukwVQZ?V^rb68BIIZ zS?6@Uw2PFD@Pn3}kXbwFZaNpdtuvk%|bFB6* zW6i?ddUEb{`3%ADZ43YEsj^pGJEo2fsY{1D#zqg3<&G*dNimA>UA6zN zo1D@c@TNOe5 zL->EwTpKwzPYnta-rphI1y{+8y=_6+>SY|G4n3aHRI3#3Z9S&} zhj#`dtWv8_tHW(IdCKyM=vv#BRALD6!>*D95Ib{7vjr2l0;xYFrvm3WaC%W)LZP@P zEc7a{Xu;%1UQ1U@VKJu*8dvD;{_I=dDS#1vk$qKM^izy#@2g<6rSSm49wJi!*k}0J zl$RD!Yd<{Ow40Q_(GlQqBXrDRjDxD+$DcrvxfSoWkFmsH3(+hoGc`r$(8%4;>YNx8 z3y4{7b$)@&wlVcd!3RGv+^kC%#cuYt9dea*8LFz?f@O-gMOTcsqrmD;gYSTm0|+fa z%0$2`&aD6{*H0)Cp%|^Ov%r*=@}pVGkNp+dy~&bT0FfChoVu~nAGfp~Q^dIga=ZsSH@#u&={gxj_%V(We(7v>u z%SEf~xDS`jmlt1e{d`^9q{zfOYr>RMivk+iUV`#p}Cmb5CB)Jg5| z7jCh6F;pMonj;<|%7%3*4;$v+>0EP25kjSDG@A$Wv=b&cFu^w&{SXDos|yAu#k=uE)pecx#5 zJDBJTIkZ;P`&P9Vv~XJ!wilhO1FE?flE7AC0Jau6^Ekw+EuWEO4 zpZRqi?~wm`i+5gBkYY@DYTmhXXT9^3TlQERRxBC{GjEXZsSYxNp8R>3X-=h8$>Z9N zl)j?V`MQTLfc@k@mHEA6$DO0x{u|I;D|FU*O5BPqMZXhZ@IYyRgcwe`Wx&h6A6G`- zZciB6m0)S56%I{S_Nf_a?8X)64r@==Z2K_V>x{}+|F8etjiS*kun5YUGPe&gK24Uz z#!dDKj)8lcOX5uBWRkl2rU^OREm-xofWJPtnQt66%7>$na&5W?%{p$z42n;|H-c3THCKc4oUN;imW z=D|6<*(HSga--vFMmLjkx%=gbSrlMEYSMbFZq?bW@HEJZ#0*w^Afm#5j<5Sk;=|)1 z`^iUnN2-VZCB{5!=jf`9D}0i7Ve8(V`~o=V?x`%LgHIi|5HkV}0p&^L;*T;CR`?zp zNL5c)tD0)rk@b2Hby{fi-cP(2*P&j8wP^3o2k#;tO( z!W}uyYrsVQfLGj(jW#&44c`^PmZd&ij9_x=FE8wD{A-%Ex#OWq?|4`=ZYeIN_rZz` zfd8PO;zqQ^Q>GTpkn-}H?k>a}P|;rNp$Xm_$i4jkuq}tm6!bPfug~!uE8R>P8W>}_ zG7ckRhA8JjfA^3v|99dLg(G`N;QyEV@)KakyesyPAO&w3slmx0Tk&t!?P|=z_UK_9yPq3h#-)1Ek^(32aPU4OVp7#j9>(`0)-=Fir+*tv-=h>8P=E;5inDc2&oT#%DI3l*maiF~)(UM7MOkrbq!6wY1jcqm|z;=%t0 zwhYQ5eLbBC25FI(jjzw<+f5ctrl-?!cVl2w(00v-7i-@MJ=KZL4mY=!s`Kgm zt?Ry{Lro}YGA!l0{l|tEW{$e=O*p2lFLb<7{JDkqK?pVGpEg)fEZ1wnT>=qudKzeh z-KlTLmNw9|8y=JaBv=RZ3d<_f00$=#c2W=_u+ zJ2s%7vdriptks=;VXq2WcGkSi%g%@H#QpJL1%Z-HpFqi|R=XB*)UjuQ+&M~v760ck z$!)#!OaIXw!@uwFhT*qM6+uO%7skdwe%rAA1g9spBXjq0n>pbN?D)2&PeX zmI}`D8tt4AXLus}dPdtnNHf~8=8G!=3yRu|`{ZJtg&7ndvoUMv;8o~y^EP_qEs?h& zOvS0=f@Jhf-JC_8m2pdYIfs>xb|QCr9xsCQa}tQrj;3wWf#+R_Gn>Jxks zyoC#o8MV-iC!GmJitr$^bGK$CjO2#H{hbk0+z95XuOlTqbudIE+%i#x!H&$v)Ic*2 zH386FOEg0%zyc@IYD+<(7>8j5*8rW#{?xd$l&#VF7f+d!@K?~dnj5jM=ScQKMmB&M z$NeGAr~ZsIa(S>Cln{RjF3sXL6r#6B(MM&ORVDAT$|`Y5NwQ9lfwyl7o?UET?J8qeDp@bp{Jq$;^B59g=CeJrqlOxMW9+s3I>q9pjP#fmm=@F zq8NkPD~_R3H$+qiWCnAwhh;px6<~u@64ria)vIe)>DX6GOLyGK5VH=j;w*(8_R8p= z%yWldDBpL?1w77wsB@AbWt^`!5nMjL`iQmoRB zcFq3fa#dC^)xNWdW*;vB@chZo0YBraJxMkEEmi-aWyt6caao#otSe`w^{t;K4?1>613A*hjrKgU~%BJNS|^!7v_N8 zYw+m;u|^4LvMv2hB^Mo#5YjBRmLwzy)Tw|^uaI}_eIiuYjg~PNF&0M9qTv#Ke5gf2 z_>SJ4E)hHy<8Ru3AsWZP6^5#bt;EHaX8 zU1EFs#88F#P;mzjw5iW*FODzQU|{$k*IKw6@$V#>CO_RmRP4BLv{o1~EI<~5b|zd< z^sbg=z8|I~q4|}sidA)*NoeY=^EwZ=^aQ-s-NS!;r*cZ(jDQA%sNYT=oZ6>#WT+#~ zIs{IyxWe+J`1cUV?z;XuaO1tx%;)=pq*j_rxmVN{kNZ;qqr){u5NABXibyO0H6u=g zPL#QMn7LZQ+ziaKEVFm#3zSGwqP|sQ*|Xbf@0sH1?%+rJj?!;B*-j47XD?;5F69>& z&S>G!=~o{95T?kW2ypX2>QB}&PigCy4@t7{#mDu~zV6;cs)9=*NA`Wp(E7aeS<-(I z;>g>!uKURvM0_#tm{N6cu;Lv9w(CT_nD_AP>@VIiiErf;X)BooIXw6XC_V!Jt@3=$ z(+drY3^i_fCVqN+v~ZybhT@vhfP0FioFV3KoRX(2;pQLy?0&a9UpTTV&QG7ZfN#12 zbCvcAeO`#!`$>JzXl3EKx;3#&HAA)?Ier=7parb^fgo??ETU_m&dMCO>0 zsRNHMZhw}=8HgBG2%@4$gGkJ10^C}$_j{_`@q|B-Uge8x`sFiH))pR4f}gjw((gh2 zo0H1i_Q8PZT4@%`eC^iF8#HFfJfy-2X$%Y3tw!X~Rl=EyaOY&5cr6^tN=kJoM4YKT>A={U2+Gbsc70Az-w#fL-`r$cWJ z%#2PchO`AE{lz^W?M3b(uwm+2`f(iXTpERlNvc&@ku++v|3xg`s+YlVBg1OGo-A`@G?rJxLR*)*npzHCmnC<}nd z&9yzHyFcEuFzY>sRSWklHsDS?l{iH(Fe<7~yRlMRLgVBQmf~taD0O4zf4mhczyG{; z)l5j@_z*s?d5W+NgkaDH2nJ9xb$}qfh%|B;C^VSJHlxo`Dh^m^m*#S?wa!;zdLl-k z^fztoXL-PO{MNg%6Tu=}R&;gYZkyP%rN8?8Sv0Q1>SZ*_lf!s}cEamfvgckKl=T_U1cP6UpWYK+rND_#g&iw0260(=xDkX%Mb?V;7$+pLry+E_}sQy?cPLN)Y?<~@DsVU0<5w^SXAc6 zWe-|pf?^uKh41pmJe&Lh{2KISN^Kg;(!$hUFQ!j-cQEq*$n(Kb?N1dfS$A;fwRyjU zq$*ZBr zDSihQdv%^L|Hrt-_OVh((S=TES{CY<&R$NYQhHc{9B3VG%A1PX>ko|;} zGz%eay6$$R)iD-zBLerbTAkGal_CA9?%`R4D2J6!iVnhGHFUITvH!`EL9WTsfdXvd zvte#=^jE)Tyy2W>VBEj@GLDOD&QiHnp@?{uOX)A$o@(W(6m)kquqy)FjN*Pe-Y7L| zJYg+!1;wF3qO>&;vZW6X#Lt@wI!du;A>c)C-|=^$5mkpNDj*HQO1L#1#Si6K=ZMNr z)2TXtjP3NDYwQQdJKMc_$J}*)ApN^4Q8O8V9CxSio?E=>)Xi9plr2{vS@QqCkStMR7t}cLUV!|s?13fu ziK3??I!zQ?E7txr|I#6hO-WL2I$8=$z^`@Q9mXf1_LrZnTNd!`ji_)B!^kivE4^ zXR+t&FZEFHVQ>++HtVJZVXb@pS+;++ ziKjeYeXVY<<71aY(n}clF90bremCnfUs~GH0|W zZag!TSm(&BlU8eswI~YcBH8~#D#wH= zoa3z*75#uVAwNO(ttV<19KfU?Cu?Q!!N(iD)=-2pfes{g0F7~Yx|ouI+4bWHLAa^* z*Y)wzV*jR(4Jv+Q>OR=mnO2d6I!FZpKtQb?8T#fUG-gvZUf+DWK;G3WyA6k%wIbV6 z4v${_APQ|y@U8TTD1)<I>vUs%zn zDUOX?kzg4GCVEKpLHtDZ($m|*)eyQ|W5>wX0?OV8W1`yjr0S21Uw}$d&pRArKz0z5 zkgL!`1o9~yIXk)AP+^Ht!&c&-zll7(6zVkIuwh{{?-f7p*7FSCVI@vP#@$Q8Saa>4 zwwybISmU>BK=fVtU5w`&N}|;upFrp1QMMh@-W#a$Nykrq?TPj25ebHk+@0)D=zr1N zGbb{Ej}YKZp9lwZyJH>WHj~@-PX_>Ue|fk79Lijw1%xzPP%a$niR(z z1lKzO_Cw$bWwSsXy^>8&$eQy4eW->Hj4(RDCO;BdKbQtO96ZueE~Z-Ok47UC_Ed)&qyi7e%hARt2V}AnP+>!DF}+R@-I=*) zGXv7^$D(kg6fA0U;}O*|-sA7rv=rZ*)3~t*m;9;!hEtYbFJkT|jk%v27q#=olN3p_ zM$F1i*|sFKt};0Ts)RocOuhqK_^Z09nC4%h%oX-pPot$L;s^D7N{LqF010dRS?4 z0pl#96y_ChMlg+wbTo$2Hokdy8ce$B*K?RxEj@^jridNh=^ow8;c92z_-`0r%$lDr zG;R+yFU8Sg2vq%vA_fpUKoK2~d)1zy)|0&7jDJC34M$4GAt12V6w=)dXBL0mjm&wg{)pB7@;xBwhXLeSh1wZC&|T>B@-T zL_*Cj#SuxmSC2srYX~I}<6OUefUpdNKA-9@PtBg2ztC>Sc#B;sNEx?fa75x$Mn1Du zyZ-UYe(u*<9@UgSQY_cKqZ$=FVRVqPQ7T9W98VO3@eui20pp*v9MY@6X){f!-5iD{ z4R?SvGI{7|9RE}oP9hbaxfj3e)z#h-WQ)VvfU-h9b-2*0M&2hyG^3oQOjV}C)hNO; zT|}m@lYe^Qyp$BP8V8Q?a*!oU>0A1P)S|n?Z)5%j#G}*uU`2k>>iyVv{*cp8qhO-) zByb!yQ)aMl4`p&7TwyjzboWuGO}DU`1Gbg(6ct8&sIZC2r27nPg?JN&hDTli#4}GT zu=)T){ck>}QY^~vctCTxjFzRr0p=?s^lA<@2-?DZNG1r}1FmN}>+>OU!V@% z#4wJ0yoLGr%uGl;Fu*vxEda)}*-mDK>(TBz9;bkRCE)S7s}0_0j~#U$Vpyt?bzqC$ z6~-d6o|Dnvw(`JfCY@cWfwBY?^Pd4{ASK86_7>bIn)H1Zo+~(o*>50@Ix?=+n)?|F zVa-*?u1V0M6TT@gJOT1%=)gn+EgLt4?+|hDRBK@KmVtYDK4o(-H@y$Bg8lVEWAjL6 zLZK|s@ZH*Rc5EnT%ya&lgGf}NOVG2&Z1Lbmo^o{8Ut4}X4|}VOPLkP0a0;{qd~s(~ z{^&1hP8lob3EZV(0!dVioapz*%N_0(W9|&|xq_o%a6@^DmfpWlq92S-$T5xs#Hu_7JW-dpC`5*kU|0KS;?F(%m&Z4grN>oO+fPlOZ zL?k)rJwnAW8~%I=#z?vX-9GaId&m;-m!h;~`3h2!WqwUh7haU$-kkV8P*XKXL7vXA zLjU{?SWH{z(%&kTeU|94oZbXN9CQr3NFCW2p@EP`NN3Sm!l$W}U^eHy?+7m>Zw-Y-hO~gatE0f6c=Xn8S zR9;gGTC-WZO*4gdhiTaindRNQyv9{0+k}0+Ny9i`)zNLE$p>8qZHbB5GyGEFO3#LS zBP~f!uU-P|=Qg)oZsr82Z1wABK+|nOJ!N|{COY`IRbZScNs0!bvxdy({g1V>f5-s@ z!_;v5Lup9{LTi==(smpHe(eeM#>ey3;&%EhA*a>i!n>lXrP+-`tL#Se2YTOp_?Ak} zHgAqlqk-jM*zW1=am^`p6}``+w-N&)ZSwNZ52vTHi~*Dd9g5QFUdycGV398_u*Er? zuFB|{>tX-h!R+Gdb19--`|I~mR1@o5;@7t|KOM4=BtR=rbL~GdPi&K;;Y|TlAr)LZW<{P zEDeTqh*@WL$vGLQ;qnpO{euvuYPs0#w;n~36ws|T<1;I6AD}dBP0^3bd(-uLL>Z8h0)}S`Y}PaWpdc%e%@V zhA22#zvojjBz@9nG z8F}QzR_3a+)Ox0i{i--|=*&dPFn#L6f2+Y*LDx~Zu5##lODWesP_z%s!D5X8@}^MZ zF)R?-9xZOPURC0&1iw}MkcRmn60C{Ez{ZG3Hv|1NK*Q0++j zQA%ykqg;vcDs%5qcfOmD{xRqI>5ch{?vrPZ@6%r#a&96YDBe&XU=OtwXme2O3Nxc% zK=l|w2|V!{WFna2L~&wEuO8kw{_RC0)#nAJ`dA1|EXgL-kHs{W&s>rLP+n*}c57_% zm$b&(F17J8;%DmU;Qrc8Ll?O8|zwRSAE=NIt zR_OjsU-WD5r}jTR=|$$EIH z&(eWB{=}O>Qr!S1A0a9QmoxLsREcOtAewBv&)NUrizb&|3Q>yO@DM|c^62!5bNU6q z)@7zbWQCURyR1(qYgM$vaCfML^Ku+_cnExwSa`gE1|bCA`G_g|t%oz2_0{i0JRvDY zh>9BDs`vkbuS>~V8mRc?HKyqQVjP^9A66^-qB^1HcDlf=hzlAorG4=F6udVvj%ZHm z^4+&?TF_JBarcz)=Z=E@fO)*;*<*HKkW%r$rKQbX4^JA?C0LpLYlig!ZgAt!zbB^u zdxZ53yO&cmOc6NrDZ~09)y)-B&oTUW;invH^d-I(r3&PzKy$ho`3XjMU1W3RcU52R zRcjsn@XXHkUO`1;4{{jlNolfsm%LQRA1|9|stAZOi8ur}maj$rY4?KgSNI0zk)K>| zbL}GgvnoLg{L|T~m|!?hanPsvS?D3F(wh$cd#c~|oq{E_Ekrp0H?D1o!a{-Vek(4h zu4nIp3|iLCV#k=-KC4&O6@bojJsb0H#y>FZwzl^?-XrR2V}HLYPPuAk%?C%B zVFWwh`aH+IJ1o~f{q(}$f29#&5mS(zBEPXWas7WMzjKz}8wkQb7ddjf`56QUaF~C3 z-D=7$?D^MOCjUwE%t_|C0w{gi%YRip-_Q;JQPw~jBpzeJ2SsVFv+!T0nf%u@>Tzk^ zWIg#04UgatavDO{70 z!5>KL9`y>W@)g{jt?01vJJA74{n@t7T3#OT3pVxq#Xt5#VpE#5vHQ0jsuwtGv|M;m z46tqAh5FsHd2k2es&(9`I4rBe@~0H3WDsW&Wt}ou+}CBF?Z>0t<2b+9ceFL=mQP=)ookU|VslBN zfmMomL&wo7$EFAMF*5n^Li23WKa+_}~o7&dqCL(BZ)8E_QeQxQD;`*#jqxoew0 zoav~!c*^HUOX`^2x#6fY&bj~o%-ab4KlXRA;T}l zk$yLSHNL>Fc?DObovstcm=4}j4Om$*1i}IY4K2@K)%H;8-;YOP!(iku? zCg^A~b}FIo$R#d%rP8fY_YZbVNo={Yyw!7h`+;9-3mdw#_U%RGjKGf%nl8N!G_{Xc2BeV=7b}$?aUmD z0|2E-sO{ztqgXP^(~pMdm~Yw(Y%W>U#G16GImSWqc4YBFkE+w&3LY|Jo%Z@T(f>cW zGJxN!J1c8LjQDwEqgNWeRI$CU5H)Q+?5Lv&0X|4$_6BXv4*UHHhzn{NVdH6~=zMBBjE;m~78)h@1bD$0;_U{zI#a7v+JOC&2i6 z!z8GiWRR*X=}fCtiLGm}Lc+f#RDik*y$Ju`cZkvoXGO;m$jvLZPqUY^5#!?2|I?y4 zctKB6e|jhIo3!$H}t+>`*hRTxpag}kDy1*V~2h4a!6D->nPOk z`aFB&RF1>sy{<;J&a&ci9al5OZ@msAEA80+OE2G@RvbRy(q*iCtSJ4@*66+8*jvk# zYr&+gY^K(GHQD{^m=;&r#5&7ZOEa~D)?;m&{5d1h&w}R`k3=_VF69J0AlU<<`Tjsg zDba6Cu!_FCO-$kz<_?lEn3)X)=Iwm@DOdnC?tv>0cFC2n(+Cfhj z0bVn4$$eY1+B}NezzZ$J@8HC~d&*bOYfp}gb5pG7GT9>06r&2m9vW4b+#H&TEEV0RTA56C1Wuq zSq1ZY)H1|?v}%^xb8NM*zrWF186)ie{iGH=#ebO0GvT`buS;>DLBe%DfR3H|ynt#D zofDQR{+0}sKqBRM%_{+VzDP=g^PVmc-+2B2wm07Q4*fZRDhQIenxn76aeQJmi&3@# z=&_2qio7PXWXm6T@ofwhSbGgV5(Dj#lFQuFWr0NDicOT=e*1@ECgxhRNmpN ze0Qgbi(=I1@jX6za{g20OqO!Wg{y+(kCu6=@)YnHJ_-!54oY6O$^LhVs&bRgQ?2Xx z9Nc;;k}WHSH`+1#KDUfB7UbrWAk2J>ts!2&_K77 zhj=K)l@AyE&lu^2^dBO89GpQdZU!jYeO#5q+tUP>sfHH2C~BY4Mr=HQ63RV;XY@C6 zcR@%7;ca(L;SNfRA%%tBBiIBlk5v_9A0$uioJgS+$E$y@l6TO%re|NUZdG%UL1T|Hzup`q7#rzpZSKAD1UrUr z1!qY$(PH}izXMsx2Qz30BGO}Y1eUy~&j79sC&tbFH+!QE`YLmEIpR(U_r4Q!x?F=} zdjDV3e}c43zO>P_HktNb`8{524`$8?V1P2#nxJKt?+xJvh&Xn!S)Y*|Txq<)$x>f; z)yJ7LtjL|ciIN$;=kF)9OslYeIv;KISXIXCwNWjHA5@?hSv3cA_?lEi7k9r>eM5?@fVeu`lTSIoUA zHm|X`<7oE2IWxa`@ZHrl-7bBDdVx*rngmT}u6c5(sLKfuV3Ki1F=5S^&^MIrms6Z} z$;7MTX!Braen{f$Ko$MLO{G#NU0-GL8a*y8aL8&2t_jgB+~c4JaHC5-Q)=kES*=TA zrTOU|*TCNA`GSvwLMmeqja6paRY>Z|_!N%z#Tg72Umh&&HdjAf+dpLZKKmq8EvlPR zmi|%|pTM~M5efC{qGxK2;vRc7ycdKoUAoBC_*_PhZ|Pj)3@?Ft$LR|w3uBMNSQAB@ zA!{vS3fB-qq=EJ;t?}1d<#!dfsY|UC-Vr%8bkb}62Lr=lgLbb6J3ZERM@mMA6y_|l zt&&*ME6~N6cXZc*)Ww(lSNZABMF7=$_C?oSCXSkkeOnK9_ydAK)Tkc_)DtU*TvA0Rf_%hct<;oNI0`6BW?MRQ-Xm=V(tvi#_APGW^Cg0-|o6_aC-sL=dMcX zY4QpCh@E?ElNcE(0^stJxON#BlOTy=nh_(YG4<{#gr=-T*YOS0b&TTRRy^Uz^?DsR zh$iIYZ%-pV^txo(>%bko3-7%x@TQRl1KmPV-g*kaEh!(e9zN(@C0)>;(3C@cB$Hx% zO~a>TztX1^OHU$F?h;p5>)uTB2vNRjZwJ#iJnWh61N+;r9y5Dk(0%;9!n7>`<1C4OX-fb-r-$uuI1yDYWb0;wZM00%<`nAC74{g z7V{;$m9Cb-kED}gb!A$$@viUa@4WRSh8hm4BEp4&plL#lgr3fBc$#=p3w+b`uz8rP zSA*#vNNY_f?DDcx-JY5-trqw|^DxC~`14iZv~!xJfm7hcO`t9OVIp0u0I31|lPhC) zQYs3aK74!9$K;P~oXI!gAAkFYtmWTXl{6cR;z`1`+7g@SGG@GMFHQDR^(@T3kgz|9 zl4VpqGJxPyIz=&m??KnTC~x;U&iMA9c(vN;c(+>q#Pic*KIgas?7rXAG3nLnu0+Q( z%E1=&f2|1Mviz>?3W*3%AUWu1YWj@bSR3D$d#3qIXqG`Y2donIAe%q*U(Z~BHARy z+R@8gpcEHdWZL1fk~$dHl%*d80ACocSLnK9NuXkz1(2Z7Qiclb+UIaHzTCJ6EIgY&>aPD_f}IP87FpZ(rla(V2b+U$5b zj3;fkoIarK_bfPh#u6Pt@k`nzY)cD^Ixtl#f+^jIV+k|2rF=bxzh(x8c+kA|?}QZO zNcchHe@l|U2gr)^S`AjXj#Nl4w?VqL^(X}RK`CC)$Dn!1JL!2hC{900%U~-0& zKLEOPrK(5a2_N#gr=iyC9x_)=P(DZDTPyU^C20~Bi)H4K(1h8!&VBP%?pN}klPdSj z72va_lbkDR%mJg9obGv?`e)3K;+NZtiH!5VVb~;#>C7h33gEE-NyqpPR~72>8z}Y0 z#|+0(v=Ef@`P9@QlYM7Noj5`gXihO@&)^=W{vx08z5$yKN%*}dr+ts6rn0rQH8T*! zT&KFjr7wd##q}iY@t@--Hsy&zUMo#c$9chXxj>-_jY8{eziBB}oYgrJczu82+?eg( zKkst8o}Bdxn!k!;1_#j-ItRM9V*H6b?&is7C~INNIko`OnQN}r#s`#W?utL3--w<) zhH8;lU2?fN&)mn8Jrih(%Z?IF%E+YZwNZsnFiDuxbakpI0Wi2t?1c{YJw>06pziRpV! zm%4vl_RE=N83q-`U{E_jwZ2v9T@|;JuiB0hDD#meU+1gh-I`dt!YGR*yXWQFZeq9A>}DKq6y*7S>W|@V@@O!yOpb{rPb6|4DglV z3R%IOPtCBjZxn6GabpRNg1(ncR$$_xtrqqtoU?|d0Edc z&*2f7GeI)|4Z~FpK1VSYyhR0k)9C9}^^`@nWVccvKKre$wp1t!zPCfap@J#|?+$S@ z+JgOmPn~uR`y?(Ng+)x8rApzcxR=f(91dx%gnsb?vUe!*OUug}53FU}wtPn^z!8b(_2`%p-=3fp+G>=EFvu>qP^p5+ z-b~Jgq?wxk912kz?nXHF(*+OQ7KofdxUDj2?3D4qdKZg$veBLKqd&{!MkmQ1$Q%h>H)y2}13Fv~Q0 zNyQcrvpzSd|Cn)ae;xTrY0iwlq&%lS#d7*a5mORNNkZ$wKCjZd&ZJmHQkrA8kgzcq z#4!!ZOY3^*Kq|USG4Ao>$6E$^E*AubSVoU2OdNiQ=fk(?u}7h{K))k(pfMn9T+sPO zqx4_^61uSOYb30^jA>V^c+V8@?TCFh)m(etQ}oZ2QBJAmx86lEjzw-{R996de2XEF zo}dKpLE;>5<~kxNQ8AgPN0P)VHhYlk#L?mNPg+k$8PoZXP#`j|vCg#>!_817FyUrU zjb6GHl}_YG>F!-mTUrNEbKB|1t;mq@lh|Gus|c7aQ=FpR&soN0N$0rOE5;N-M!OJX z+%A37iZMUe0l!RfVIKD56P~m=5+6>_h$5(-{O&g}oQIjdN+rJdK-!A%cQntVrPb~g zR9xNZmsjjFgSyWauDOR;4?eeM1-)c@%{yAb?a=iPtS|_e<*4N>_ptfgnxZqzjQP;z zJW~oc=J{GeMnPoI@MYS*98u)oS_*R9jYF(DmqR zP%28%UO2qatH)c+RT05vbvy?B8~xR^=+Nu_v516#^c;UgaynLO_pf04xwlcC2&Q0Q z2cUd&aBLZ^NB?ZC-$_h-QFm3QxJlCtMJboQ7SXQtJkbM?yL(|*Z`D>;18n&pzHlWLKp}o#Qo%BEB2mT&N4fm5abL!|F3%Y@WG%m z5M&~*u{1miURhS4r5|3G+Ca$|;{e}_OIZ?On<{2T=)Sq;F0olz*`oaUG`psUo4iN; zD^B*UTlq3cKc}L6hx6!_%L9X=&m)YQD(zCUN|M^!{STXt_9cxs5$)#b?O%6{hEneu zA)0n**m>2l+Iw*R=XKZx4edkJ%@jKGo<{`Ws`zHefu2+)$2*;?tF+`J61i{u`!_|qIFd}<`OgvwXuRr zzr*-jViwa+KsGQPB@omH^;F1bSFYyq=Ga$xopy23=sl6%dbVhXre@iKO((^3hP&jN zZ=rf*>fd*fG;wH*XYK&VoSc_cSCLVggiSRLwCo3_lS#i!MIIdva$hai&AYR{uh!L- z>~s#-oEMVzRG;5F>-En=PQA6RW0z4^P|xgg9HstSw*5$%vjUT7y_x%zIBhddFej`S;{|fSHeg&2k-Y)IR=J5gR@w)(#Y&DCmQ{aJY#g(XUMN zmq`5`w1TAlN1e+4HX6-H5&EUdtw0bN2r5?cTZY_kp)QrCAmQ)eTl#E`pj+>` zGGS@<_#AhX7Mm-7cOLBVy2$Ufb=lWcJgB^P26D*br z&BpAjmB2SV)KFOZ2VjBI1DN$A4BZQf!Ph?uB|7C7VxOe5 z&rkpOa^C)dX3vEOtS=cI-h~>tYyWhvFk0e4dj1Zc9O(duJNUq zE!QG>Jr)>yawXjJb=b93XLGrp2E&F|q57l^^5(Z}Ha=(Lb=$GtLu0%-j!)(h>J8WRm{p!?x@*=1VYl)M@-WdVrF1>i^kg-lxj<~5wm`O9R+SD)MZcPRaz zK$FeBZJ+QukS2l9%@7_Qeum0z*FRdh%rpsvuPy~KC;&s=Z~n1dq;{M`wXl?uQfcd@ zA*}EI_9_${bb&^TvV{gF_Xx@#zqx(S-gP8o;o>a@XFExznU~inxr`H=6`uG zp@nze${*vjd`D9Ka%P#cC7vK+m!-4`s7|S$*Qofu3!@OI%8~|byk@{w$J#4EW$Hda zFA<{0P{f5GxFkXpOf+^4B|UJnu8g~dJyrkocLL}JKYfE+Hcjmt9!=M+yA*frVy%sH zcy#o+ps~ik*Y;YfgnoMod=u+ZK-ttjjav)ob%MibjInsuhZm*y{Mj(zz_}AU4<(_# z5adUWD-e8w zyy2?N*yFG>^`R+SGvGGCoWO2_wlu_@ApiAlJ*iMT(&U(K z#X_omo!ae!JXNJRKacnBxMFU}>=IR(T!7*M=_hFZ$Axti!yX9b%jVOui_}x^%kmhU z@$e{(4&iMqI(u4pL@I7thu!`p^BUo}77K27@xvz-24{CJlFbjyXuEcceYE0F+3pH< z+v?jQDX${i-%LJvbWDARGk2hLZney4_PorF*QJw`>ZMVs^<(H_AO0Af3!{(6Dz@WS zUJ=1;y2QHOIGer$UFo2N9JV%cD0EO7Q`DAB z8^$9V+V?KQ0)vIrc?~OvMNL>O$WuIHfcggNYQhB?-^}z+xLB+f8Xc+ zgPCUL{{8Oz{vE!D>$<+b>Yd`>veXG-2z?YcQxZAJyd84UL`rwAu5`6}S0^j;>HQBLJuJ zsJKLEYdnu{73vJ+c{_xY5>ua3Ij24xVfc@Oc8~d0P`cqhbe&MLuKoQv``*5WTB0N$ z)WLBtqEhJ{`I9*(mt${gdC~&e2VlaEy9G1*Rl5gQsbaullWSQ4g#;>tqEJ`LFo_C0>syXI zzTGrtC)1tuRFx0e)kHTYch8}W zN^JXNVvpNOZ_I~gwN|(!OWaYi`zck-7J3RAGl~;hvZcaEnbz%yH=XM$wzPCqWt2^~ zNXs-_5b5XAkW(&x6k#!GMWsh(g= z>n}-fdaF9DE*D-fJ1hu!WW9Xt+B^-dt@5_2fXsEJzwsM7s^aIpjIUfCnZT%_~YV^lZreb=`|LmYfpzCm6Y`1O*Ru)Rl#o#POb{M+kT2o?%2>vuSx#PleL zt7@~Aw)~UMDf+1!X|Ai0*d5Om<@$VmlCT6GH}oPxfl-nm7z1PCXh2JkBcr6S?z18Qw;fQ&`)F4eEa zToXI5tiL;#J{7Toeq-~7xqRITXd{7s&v8xDx876{}Ft&a+oPq z2g68%+d%*KJgjV_`K_eMfepUUMS3>|;S{ul;{zx)^Fw<KvyP6=@1Em<-c z@N+w6_opR~ZRI3MxKePJAfnDy1w)$(4bER6tfK52+Kl|X6a9m0e4 zZrFpp$m1IU%1!!l^l)Rqr~S=Jb0whvRO}W8%stk=&LN>IiQ{FB+kH4;nSzF?`PY@V zdnAMgdUTK8h=#}kcUhW3^hO>3{OhZOk1D60u`%lvEyTzz(d3#QL+sNIj|#^}uKkx= z|Gm{`O=a`Wjl<6Ry6&yd;or$|dYF-A#)%a>w*@5OUXWNA?h6jzncS?a#~D+R-gX_L zAvfTcQ)NiraB1i;JMCDv=l5KetT3OrNj6&Np){Y9yxov3M^tf?#a^8*V#L~?6>80X%$;i7G>CM>H z`Kt1Shk{p}yy!w;U318V_JJ@Zjb~A0?%b2P_Zk9{koaL3jsvTh*u{If_a32(`J^B! zR3p*?N}VSS{3T=;ME6Mzk;siZ|6G4|>44q+4z9D-da|hC>n(f$wxRU-){M{N4F#tu z1bn$fT~62@#Ig$zSLHbw`8{05_COMSAnFl%<=}*5Ln>BAsRF2+3b1G&-Y_O@!*|7f zH_YC-Xz%PJmF*AP&Ulr{Y~3o$HjnHZ{V3;11aEV%j85|Z#xz_*b^b!mmvGYBO z5)js>soa+!x>NMN#$Zir46wM)x!)s1oVOhA_4D6WZf8aT|d=X z0{f{H{8>2{W0K^~0uZY&K-4o#7DPO3oD0XO7}n(=8_~o>$z8}bzT?=}-y@L&Fm97m z-RoPJ66+%PyevCdzZ%4Mczk;omr)e1S70T-NYU^t71P+=Lq$;R>MOml|2^2?R2psx z-5MRgw;AirKcm0*Ai~ifm?L%v*TqZ4@Cy4G13kaDy7I{mV|}tGmwjg^&BvOS=v&_s zNu)gDaw)m8?E7#(u%URQax7FM;cDvDvbK&yM9DXAVlFyYnQVU7qjw$4Yi+(Wo3Dnn zIV=&KKm?0|i3ytO4)!?}xWp9|u{k>V z91M2=rqe!yT+CrxEsnW2qs>6LQ++~#sP-HRG<|S*xlqU7wpYeRple^EumG`oC>k2^+O_*;{Fm%gp2l^=pI^+s`3$y9r5kTBOk$ z^5&}6V%VJ*^IflroIy$*3Uzoev-24WOepEamd365853}N$m!N^BjMp=dupTYY51|? zCav~CyCV#o?+aavj!c^hh?Bpd%(kY$FEHD#Ocp&wxL$6U66$7_9SZL~u~8;I>7{Ye zuEWzNQ1WE6q5of~u=-{MbVEr$M5spEPNPxiQru^H#F>zB8s*3Tp}4Z>(%2Do>wXX@%$D~$_RY0Kr~I~tJ&Rrc zyu+I|ZQ5kts&vg6>%NZaZQiJ9veAob@e0)d4xnh$`-lC&$A}~ch#xEn9b;xfrC#J~ zN9PA^ozgrpAny7Fv$5BX4jp9sdX3wlKN@mXo0GiZ*sjBA=Z73M@!MnOUt@|5JGGzx z{SY1h5zDwP-w31+#E5LoM&o$=DqOApOqQX9g)238IPH@-Jvd`woG~Xdsp_9MeBC+EC+f zx1lSF+~1|*MOY}jVb~;Mt!LJ4W>xb2ip{($%md}hf&T z!iK3#v#J~|^SbLF(UhlRyFfj@8cFR?OkIdx*TZpLNDv}Msf6Q;vUFWP=z38&UuyAE zSwYp@dou#aysWu=K3_G&4Ou5td{unAp#M|M{yEZ?Fpwb=)#-fhb*u}yHoW8EAj(@S zXZ})3uJhHP^UK^NCQosV$@^1}wb@A2p%^)Y zslJF&or>7mOuqiffVWHpAYIC8O-v=+zr>N?O7P^;zvr4xQrSgeBUxLb@>TYQj%^Bb)D`X`SN_apYRoNEJKCd{r;Znn0zOeR#Hc*Z0VjOmLbkrt`<5AzWc zMAb>uR;Y$j=L9!FT*;+2uWGw z{7DVS%JpJ22*Mj5RDUMa4_Mb(eB9@DdES9#_APF1Io=hzdI`(nCB!F=EvT(lD?J%+ zaVquJMBAoyZbf$z{BCwS^RxHb>NzJhBF-jFuDf`q#lfi?R=K{O@PWkgL5nErQLDeA z%}OD1nBrfWv7!Fo4XMFNh)(v-GvxR2`2Bbd6kHEb+mKlrkkD1i`Om>kDhq8 zsL*pp=0m@TRXI5^QJ!OKGWLGoc)&B~%qgQ2ld#t6tVYq&l1mXQ94EHNy+v}ifC`{! zlLzsC*^90{hXqMV3n00#E?ASfBWl1GF5f}XxJF*S_u<8~w$=BR-6@H&(MUgd+c9v* zi*GWF;vT+Gn4U_`$e3c{>v;aNg`D)b4wPZvyKDOhAD_0~WT)jBXEwXDvA8bmpdG}$bY_zrjRe^_ccs{F z*iyOSL7yHYn0Zrp4FSJ+U>Tm4hC2{wgbI#A$1-V_>9m;bDAFm-&aPg6qH1+>Abg{G zZwNUsNoqsD9W>|QWivlGmk;Jk@reO^ku*tCe7c6WC`WS{`W|;^=LsfMTlHxvgDsG2VN0i z9xKg&6IBlP!;9^h?^&rb(C_`o$#?Qwc!?z4_7nf%Ihsn!oma<})vCA!Q$|45mt8Lq z8J)DZVYOe}1pN?FO~9X%XSIs?*Oi$q_C_u50#snl$C^JCTo0`I#=DVfn@aM=vY<%; z_xh`P;L6s6kv|CfKSSE#H5PB<-GYWugt5T}i^D3C&BwKLC(V1M;f6gm1SvIgyS9-g zn6}NKeX<}VqZh#9Q2gCVVqqyWc&b$32Gkh*AMS8#6G|ZMjf#qrb4hENz`ROd^F}va zCg!yCPwf878ue)pN!t<~R#XD~A1UE7Me@{{&rh+ zMv^qybpP;b%ub4xk*cz)aW)l)*uh_-B!>gzt}{sw^xzV~2bQLw=Mz+VEzkM#RmDlF zq<>e(Ih+z5O4mbYjSHv1dkBf#ZUV|af$I76=GgmFe)Rdnn^UoDM`{mFan1^+xR1%k z2m7?m#nESV1f{_%M#Z`v+1Rmv^5Y6&(FegUtj%=l2x@BVWE_+M5&pLq{+C8WWtcU+>p#V8{-Jf9KJ6GW#99{ba~Gb zkAooM195p$ip)bkpzOFi$ViNM%s`#tA#29@_$DRBXuS;oye`58;3W#`pz(@~Nrad) z4?F=Q2$@KjkyZq*e*X|zN|KVaU%zY~OEIUYVFYGi3hBaBdIVJe=I^o7!v{c(mJn$3 zO%$4o+lxCpiZ?yYAI|IB+DytWRB4o8LJHWf#vt&Pq{9WUU(Dud7z?Oe$Pp~sV*M)U z0#I!gDXPFgn=N-jG-P=`0SW_8_>i_5+Fnh=4=vW9&IqIAub^xOZwAr*XjomHXDND} zg`H(-uhtaghh_tkMyO_#wV%UggNE|6SNQ-Awg<9K#=zGDm^$oi8^lfvP+70 zT&maO)bz$Y!LLv^`Op3@XG;RxUof@1$am64y-`rXeii^Sz{Ht#NuHGyNv*W(4M9cP z`wC^~B221`-oXxVF(P)mdw)B0;0D9?;hh6O*4rVSD&)}KMDPu4t|yH^hb2+JIs_+h zg(Rm5cydwJkfI`kc7320XWmU)U(-e5JE6v z#~Vjg4+>BOO=^`(SU5&UNFtFKGX|56U3jB%FYa-hMoLiw_Y%Nc5i%}e48D^0<=joc z&XkT@k5CdJzvnUzzQ{7J3#-!O6eX9tM!q3QpDe-RO6jqos0={ZU=%m|G-UXs>+KUy zHBj9kVmUW6eJXVId_AM9#@8I>|ib*lL12i;+MlsM&0 zKekVpzu9&ZxBTEsQLLe2)z^%5*_Jichh7eBYl7;-lH+WKu*>cdT z4@KJD6joJ#`|Cg3ZmM5@{Z;kqKNm_j;y+K6?!y0{p*wfn+u6?HHP;81ZfT(_Uu3a( Ke(GHJpnn05Azp+4 literal 0 HcmV?d00001 diff --git a/media/images/gemm-hierarchy-with-epilogue.png b/media/images/gemm-hierarchy-with-epilogue.png new file mode 100644 index 0000000000000000000000000000000000000000..dde58aa8e5a62fbb1c7acfb015f884f324607936 GIT binary patch literal 256654 zcmeFZcUTkI_dh=BTiKPiyRL#Dpi2NNAXQ4hT~{#DRC-52kgkD%B$Oz-xGMrW5$WKH zfDoE=LUmCvv``E+fJkTof`TE2`n_0apU?CB{{DTx?|Gh(VKOuK-gD1A=XGA^%=~>p zUz>Ng&~6BVcv0tmzXU=1nGm##`@3D>NHb5M6Zra%*Ka7J@4)8&ox@+?|L?D#GxvfZ z z=w|$6Ur=TrCZO&^rwWbR{Rr{)&0DgJ3;ET5{8HPc_^0JTq3@`*2Vm2kHJ)6j^dH*gU7#ZQMZWF){P(Pge6gcJA(wv#Kb}>z<=6-S8AJ zDXFOOr*PtN8WGxU{#DKc%6|O?dQ36)2z7O&U$z#ReG=W#^b;v(pvT+P;BrlJ1K8cP z>AyKDvVH)8l0G-%iE2J7vK```_kr1u!q7D1DykDt|-gB7fc+SILtdpj|NYg~d zp1Bk5;Najf{!r^)gjju6aX$f3I#m>hZaq7F-0F>pYo`XRHoch8w%_{3%2H6|)rNj+ z)#MwR%O5Ch+l@7XW&uo>TJmhY$omM<_?KdSjm zo=8}~Vs);`ydIF&lL6$up$>qQq5~>yz;Otf%fqO z#}e9}4GtR0J2lhX`sOJqhC(;!uOCxQ|Tl<+R z9+brCk$zl~OhRjkiq}XA)#a+&m?xTTDZC}j8BP^lO+8N$A5qx%N)={`@*B=@BFWWzaUkEgiaSX8O%@-I;-ad z40j_sU*6>ZU(eoIu!_=lOwR~TUB0ANxttl(dg1Nw+ywtz@BjR-7hr@|v7ymbTUhgm z$ge*-A8kU<64D2WwSFg+AiX-wie%-n<87q!JUe7*Z) z4}ci(HuDj>e7+Zw^5s=%ULRBA0*4rgDx;D+q83%l59-*H-Ejy@g1TAJuCX0eLSTIV z+xByZ^aj+r^^6LK|3aGo0=NId(`WS;#@E+|KBEG){0sR=%9(!r--u#KPE^~W{OUXe zzhsPM$qtA#o{y@$pt^ni02)PoC)Mxc}VX;L_FdfH|&()3?Zha zLBNQ7&p~7%*pYmj{r9PC>vaHtcRoRxJTQfVIVh8d;bZo*_61ycYyq5k9_<0_-Np?+ z6tV#($mYM&(VG3+px;W>UMr|-WZA7ZuhLt~c@{pdpT|Kc3lk3LpwAoJg&voi_^+%t zxO&GH?Zvm^-)>!f{QB}v&jywl>A839y-H43(NpGNVjP^QDa)n{W{cvwVK#m7kg~)1TArl(aP6c0$jhy{{8!HNZ#Y!6CGI#FCK_%OP*&djOR5-s|_{EQ`9lD zBTX_<9^<$DP?a`YWris*}@|6Ln8U)_;qxk944hzN>^ zR3v-rw0o2eKdQ6L>X>P5ZLK?BtT29yOSGhj>Js4R=H}Mi*qML+7`ZsDe;}5ga{EMB z+0tAuTTfS)=0`O)%{tQ8_O#Ho)24siyQA+$oe1|n85?yQQW}bAk~QyE)@M`XH#>4Q z#gg>)-iGl0vuLH-H=5-9YG+h7YKAN3z;bkR5rSW;RE2DcYf(H_V3F0Xme5RV+iJhB z(>5w=^iJmanFYgXeknh6Y?&!cYa#sdZ3e%7za8D=1oErJbFfuU%yw=$t>v$cOvHtj zCkSOpbC|vZ6|V2aZbDQ=lD8!VP3sA5z3jvW$v^8ZL@^VwLAJ1VTB%wror1fx)nj^F zZ;ZgbTWR~3{g>W&1;G!bi?`ew3i$W>x`z%Pvqy>Qp$|%lP5a4_Q&7!1GuQb^TQ0$C z*sZ%X{IA|VumoyEs`MSgcMmEt7Q1=TLQtgadN5J#Cf_b-0B0J^T!LjJRJvGOo!Xcen~0NW`3*QAW)fc(Jd*!nnQv z^9r90mhRll>o@t4^m_QvmTp(S72Yt^>eDz|7-;4x@*RD2@ey(pN(s^39xT1(qQ_Uh zYQkGqz$tPch-si`mX8#Ng~)lxx9u>PK93k|5Ducw{0JcO zfhuzoM)Wc(^WHu4#T0oUnX~&&2sQau z&U$a9sK7O;ts70m7BNBpzeR2H?d~WI<1LmOg6w|Sp!Z`2Rq=FM+mBlZ9sGLT6r7$; zh@{W#_fSIYpZ=A5%bBM!8$EGHEJ9OvriI7X7L}K`pt9v$h_!)CXtCsi<6-)gVO6SE zrt<$hb>rffJ-821ie+%BQOxB-WJ}oXHT|!3OZBxCq+ANN|kykh*m^HSDbDT`Oh1;)J=+=Vw!A3J(Nup03? z#VSPn2bDADzSI79@9XDreFI|huZ}x7DUqAY%kl@NM`r^^0^})^8sB$cRZi;m``mO# z%^m~wxCJJzU<_Dmo*TXH$|s8lG;+p-PS08=(%&O=V5|F2i<4;ajIHW=`qyvTiXEFg z5ZC8Bkp1Nz6!lr}*1h;ER+07BDL$yV^Jw4an62U*i}<=O&G_wW@6|`2jN5Cy1HvEj zcy)L0SyO0N=LeKcl7k)`L4Ca>OAbakN}9j#VS8BJ5;@I~^7UnH zJ=gB9V*=J|2OrysV3@&xs0`fbFJ zaQ}B(?vFoIv#!)FF4^a1uEEnCuunJ;R($$q2EeOju zud|%>x8ejSBzEq}`s(0m`2$fWF^)o`!e|mG6Ui9!imlE8udzWuHLpAglZg%KaL)EI0q3iZv_Oz*~CtD}a_`z124hi;exaV1%A?zu|@lK$z;rNFR3kU-x($8CO zokiRr^yGYpr=t zT%jEW*IJ+W&k&Nx+%lPmN%gs%{r!&&g60X`t~sc)rrU(byTK5sQ^3jV<^MAM=$V?UsG5@$sK3xgPnyJSteeF|wCKpSYfucS~~c zhX*=eH|kz37_CRdA^a-8udp*KW0EF{#+l6if5t?Eoj<&Ll{~OKSDe(+-k$IHajezz zc;tCvC{rO?UxP!Rb1&*1X#{Mj`bF=kUyE>XTfB-F-*M7wI|h>}ErOeTj)Eyc%d-n2 z0Uuip_F6f&9JeaT9ct5&>GOVZd#7iwQ&PbE@o67l-w4p6XdmT_&N0*M$;s)dxsM;c zGtKi;ldBd?b8M_9p4($_xP^c!YisKzjiTb>;$-j$0vkf1XB(9p3lMt*d8 zSyW!vJ<7qK7?3GJF87<&N&peX+0M;U9E&pD_Zf%cSuNcvN(l+}q1oGbszrQhE(K2a zkxL)jOIMb0Gd-Q6#@>7F?w>Gwf4z0DMf1#X5$c-rFro^DJ_iG$NhfU5iQu2HKjpD; zrcK5Cj+9H|v?n;8)DuNrzvw(V&Z%n93!QBHj(gwn*H2M|@ad^3WaXGNFWR+r=Ptui ztH2NbRZDYaBruX{rC?Wro)THPiqnMfvu9m9a?B+2wcluh?}s<(hV8Ab)8Q@GJ1&X2 zqVhUza7YVpz(;M^@D|?|Rm+S|ALdHsOl14r?2~Q*oCbm64$B~0Z{EkKzz?BbUi1eG z`>gHFNF%Ymv#H7fd1FE?JV;bxdZJjIgAtq^nAfN0{GSsJKtTrt6BM#_m3ryS5qEd@y2ys%l}QGpr)W$%*qJjkoPscrAp2G=^dXzoXB(xs zvlXs2YpyI#!8Akf2rZxzHb&A>>~V$#J4$pk-m5vBP4l80>h-KzN&t#5x1-lBU(bMW zEW3DaZZ4lto?2G6z&h)@-%Y^Q|AQpzxa$|7^=;f|9(@CS7X2e}5tP5B^iPm#b1{Q| zRyAf7@9W!s?NM{HUf!#H31>S6AJs;PS$gjjvyv5FHMgmdA5DcrV_U=_$2rr3Z#Ssxc%=LKelRFzRUO|gcl zDfj)IrQ{H#j}fjmOe{`Y)B?W4#L-{bPLGX!x|~o&kOBllicZu>-*h1R4Z@4DB7Ij8 zG-N!CE*;jmW5mlDM0^v$=Wja)`SqX$fwF{g1h+@^2Mw|Y6N8AN(8N@Q7}iUJ#o}k< zRw`yi>u)T?KU7~f(pW5(8XZc~Ec|VCoJ(L8W}}B@mC>7qI?WyjKXOBsOI#uF($gO#OX9yoZw zLC+p6nZRO_rAM`JA@TkF{WKiKI%e(`tC<&O0y&zfG2R$Fe; z9t}^ErerD8Vs`NNrVY?y6_GK|2XOM(kI%|i#HB`RlKegQSpIweP0`%hQuSZBvi2EA zV1e8;iuLEFFYVt(k|rw(cda|=Rq^2uZGgH3MTthW*di1Tqb(zgxa*P zQLCcO$e&J$jfS&1$4I+QJh5*aJD@T3I5wutV<8|BH^M=P`#k=SO-A|L*>o$Wj5hi* z>`FLrpI)qpPKFn+!r=&kSL@UBdE!p%Y{0`c1J4lFOSDb`Jp(bVJA8Jum*B0OE^xX= zx65UAaX76%O4zY4Ry#_E1kG3~gvDDJvXuUDF+;vcCK`ETr7GEx%}Y~u(E zA5@zqvZ_LV&JZOjQF?;xCrtekt$kA%eCh^dQ_~)eur-hqCmRqJtAx4@BvjLUmGc}s9=o7OYjDDq);HWNX< zY{;J(6EHVE-1O`}VQr&+FNV?S&qT8v_24psx`k9{f&&KbouKQZ(5`Thf!q!IhRx_R zcd%yzPRVRktSL$2F+WN>2d>uDd}43~09y|eWdzF9E=0~0H@ zqJGy9DnRQM({| z{y6JlgO6263D#bHmG_9;melJ?SA}U&fMUs!kfIO33s?`dO7=vD~}~W z1;^r!ehyl z>jtX}w(h-bKK4z%FMN?Y#8K_u0jozkyr?za-0(HtZ}1SQzOq;kN_oo1gsLHL-<82v z&Zb$6`W^horE~Qr#-`jU|Nd#P&w7WT13_{sW+(T)eb&h--ag@!wv-Uc%@`+`7Ef_5O5E{Y5jE-1*QlM z3r{$#oFLz#(gt~%Ys=}gHptSy#`bttF8Zoz;>~y6tB5&wfk{Ajn2R;|jRqtGL_CC? z_A?DvpShOA_0P@(uV$wiK==*=bmZP@Z1L}p+6NL(+G~xPR>c~6?~QeGgOw$K$##Ur zI$j5!B#$9XmH!cdc;&La2oJfG?#d~Wn^An>t)fCX6x(`qsgV(Xtn06^*RK1*kq*-g z&JIi-msGbTnTF`d-Ly*akFI_t4?AieS^79OoKRcEi7mf!1^gnFc+)=-=i%3C98%Gm ztUWrG%r8UQ)pA<7|C8of(`!cvQU*sz+nbEf&nzKCP)d4JgNe2ZDL+DIezk-fV0EB- zs@~FF}pbm9B&lR_y>Zbl=1V4EXpMV z%eX<&d9wt%6g4@KOzmB`ge!2OAcEq7*Apg~Q8u>~vUlBB{F?#z zVj6ga*A1{;U!vYj8jzG6vhWU$>3m)svY`HovLQSz)@Oc1i0OeWgA}aZ|JH^_Cg2ga zei$2ftR=_q%Lw`rzC7>Uryn>BhCJNdgrM8-{90Uo4OJ!)V>YzRL{y!ZQzm=Hwta_x z;9?FZNpSoUXm5tQZBjpMbK>yI=5erf=7p2%8 zh?N+Ji*T?o?eBk*-KRp)p!54-kDMC=(Fjiiv<0l`v4s(^L%6QTzYx!p1jGIEB}d4# zKsk`^IuR+btLh1=0mvB{6~A@$^S9m8bqH~q*mZ)uM4in^oeK?E(Hok8PG9biaySpW8{+6r=K&QE zlR&pbjVO}@d8xi7X<0N|zK{$q!sLUe@~Dd8^kkq-4!;b3N1&SoVo_(k)fo85Lz}d3b{`U%j%m33`082R?AY z`+K|J&F|C6Pjog2P$BkPkOM&>Bt3$^W~4xJkR(_kfn5)^pcbJ@W27qKEwQB>f`!&8vmb>Mv1 zi0LCGT(WuU@%7M1@XifYmC&`r>uaB9=hXPh* zI%8N$Icr&3b>JRi#R$phuELgC!&fnR+K5soNCgt)aYqH-r;D5w*rpYqK(I=vulin{ zR?YUh0JjT$n%idMV!}gMUtX>o)aIhk`I%B6??uc}ncsSnd?n4J>En_gw?p&uj&LvG zKs>!)sPqJ3@hC8+0=2IV@;^_nF!L7ceHmg@A|}3mv~1Z8rpFp-^aCa*695LgKYCAU z;qdP!W9eC_%GSrlL8sxE^hMnE)A@_$@Cn$zZ0wh{X{c^lPa@%;95+B;Ix2%jel}Cr zfSIzheuOA}hJ7O^L0+Z~r3x~gi!^kQp8=ulfxZhs6v{z@Wb8A2A1zqm631pFhSy^k z#M8Bn>%jfFInBiq8j?*T3^zmxbUiZzexv!>#blfEM9rYUL>#kpyD+avoM)xqT$g=> zK#pm)RgBtnd49E)c$-rCy%mp(-ap#CJR-rlgJ7r0iLNh-d)L>1fHx+Ke0MJqxGt#r z2W9NXu_woP_7Y(A2M*?%Y|Th|F-ogWmLDp(-HKxB*2NAIXDzwD`R$g(;F zcbJ@>&hyaPP-=k%7&wkKyhGTM8~?z8GZQNBL5l|P24%~spUy)M#|q@EOjI@3H>*X+ zN=r|TPTvTVb5hZGpPImUDw7+#-4PWm%Jwfth2VAxxYq^ z_qrGR74`Y#ul_9iGJq~F9ys~o7NXafl9o1iJpyC~uOSPyaIk{~t0>+fFu3dT7`}!8%vGzNa1F}Uk zyISkjd|Q- z2tk>IZ~Zn%fd#(b20bnU!SVb*gRaG3jRH3Dv6HpUIRJYo5}YJS)^;c);us-5z8+7B zSM6TWlam3t+HCjhIfG1z8sxIbp51Q}=(UWC6gp#S5kX|o`;+K%h$sm^fBW5=UHLjz z5G<)7SlJ?1Zo;!J4hEk7B9L7)_Bi=ZYVspfNk4biSomNfcAk%2P`H=6!%Wi zgwSGu=Gs~qmD^g+68|K7Ohk?W z`pO>cHb}fT9#MwqMv^CmN-x33;Nv|4lI%W8eu$8H^Li?Y zUV;!aK-;ZexM8c#rP_F}9obzBqRy;DiVoTJT*BjX~9!x z=6B%2-1;)Xqancnd>9gR^=2AWyK@xF^=_<+KXRdATh`Pl)xHeT;6hhIp^e}oMCe3$ z(;T8NxNWnbLVj5H;4P%N^E1I9Ep@<&M{dQk?u$ zg`--Z!Dia`HQyB@$P6L2XN-gX(IE}5iSjIN7-U+cf(?Oz*sz6!Y)FH&$Rbhw*2ttp zdc9D$rI0Nb^!P#i}ZNg|@PQU`v&rBnJR?6_WMtOLDlg&9N$po{or8 z%5Hu~!eiE{9oYii^K#;m0@8uyNsUhs zny8u6aFSxW091YMgFozP8iCbEUB$m!LixBjq;EUC3frRQ8@4Ibzxo3Fi;OCBvW8zG z*b=43LYz0rO3-25V1$G#OK_Z7i{prui{cC6wVl9fg@hy2OejvBWRIBogDv&D=2m4z z>*%oMdwk7Ql5-7SGTcueH*h+dT$F+s04`;Sxat0~@Or7BplUWMPcl-#T2l!?GYC4| z0YD_8TMgxzFsXt9a~X$_dU|q)#;}IK@TDF7JF6d;lz~`2=x8z$1s2_0U%J0^r90kJ z_^aN0`T7>)L68E78r$Ku9QxTu8)fBPklr7`j zOag-w>6(z!P%K2{hVcI`dcn>pW)?wBiTnWNC{cO{#m1ILL;((}BTcOOLV-Hj7#QLk zsJUl}12$3%j}Tlba=Sm~BZ$hW2RF4|1H#bH2h8~QNZzB+$vKBfKpW5K8WVod62xl4y|U z1(Q)WbG#g#az%34C5~uC{+3Y-zWXZKYOElVgiO|67{x%)QIFEA9?Q(>W(w7GMK#~i zI(BJpAsIo=FecgvO`!mbAd+7GSQffxwt^#1Hpby>TNRah7{=CTO&=?Ym{nAW7i!~b zCCByi=4@Zi^xWcK{9v1wl|_-^Rf|wBcsjPb`u+0~FzJ>oADj6=zJPRb*k2ux4l9X( z#(B7Dh1gx2slWYARKjDNQQOxBfir++YmCXaIlvsR3)4HY6FNhC_h*FDhkxq;vO7a1 z!n*o-<_RGYQ;Tu7VvS~N`IDc|ND?9uGJ)UBcuypc>MsEJ?la*I(PM%FW~X@ zk?nHI#)dTG#8C08pDJXkA?~i#Ds^pu8x<; zn40RVFgjZJrv6uLZEe4IFaLIh9hzblt`wyrp{~^&_Gi(`oIC{n)CF>&!H7@9XU?lSGlq z%5;dPIUd4_E=Cj(31IKKR}6yHP+CzD0E)gnu-@=q(Bi6K)g-zVPJDXa(c1dKs8z&H z++Uh308d6c7>Ci8QFEE1EG=9QoVov@&o@>2{LQm_J*Ja+Uc7+Yr|CCboP~{ntG6^ z)jx+G2bs!p(Hu zMbFNfRVvoK`Z2>*b~8S0(Oi>j^(R%ar}l?%+s@%FTAc~j{quUC6Dov+B_ zZ&|9ZjCkkRn0tps`OOUC02f@{0Nk-3$Ts9jS=`8-sosI%;Z-(W?TzI&(4IsNQFfZ{yyT_s^dY zKTbSLZx79ePTRVIv4X%=5#~I*dl53vQZA*B?aoX>5@@jwRKK2=xOo1O>mlY}2wP$zz=Udm zzLxzdAS)1ox|%74sz>d6v#JKzqrtBHPv=IzR9Af!4F1Oyt2G3U2WuSb(jT<=p&drp z*M*a7_+?zQBp|_O3xv$UhQjH3dLf zF?G=BEg+vqcqa3cT?%GB6} zf1#KLT-A!jsMmyp=A-)e^d{6k&&|9f{_=XYlB@%A`4fRNy;6A`g&vN~pG{=PcX`e| zOXhPoIRTx6t>Hyq)?)fcO&S4V>*g)BNT zVh?e3+6_>0DX|N6MwK0#A_PIXHSjbxCK!35c=9*^ZPoXwy}fauJS0ijbJ4wjWtnj4 zXkb7}VcIwT z_w#~$oR~oH&RpVNlh!@q&IdP!R2f{`AgyPhD5>0kzTH+IE+0N2<*>5EW-8-qcvspp zE%a84QubPJ!$+w}0xqdm(oY)}>wL|OdRn$W1=1>sM2#3l1TMW3sOn;ZdKi6JS+Z0U zv~ss2f!@3gBn7>V^Icu&sZuJ+0Ne#dIV{(HsWn>d7JeVv1s1WLakc#`DFDoC*AG!C zGuZ7iM+J;l>yh5Bana~!qiXK6FSup&OG8{!*%2APBifC3v>Pk- zEpgN-)9C|76$b2yPqK=JR2B}jkH0$hHh6kPY!tY{nVw!q*4;LvgO9>5Mqs^=z`MU@ zSF$Vmm^UJDM%L}o50V(KT?Thvgi^;JP@Aba;p9Ht%|)BSzUWNKi_bz|{gd0a_$zBs z_`pzLKM3&gCkh-pzNG;1?gJo&m!a)j2JvlW{jB+p%>-9CwVV39J$+a8m7AcrZn01=P6fv#V`G`?SKr&is2 zz09FcK(a@`3mL?f%g4^jkQ{%fH}h)YGW3&vlYsWsY4Su&5ht|h9mZ=s259?R0#z6DnGN3BY48%0d@v#!fL!u!gX6 z`72IpQ{MT_DxD&`73Cm$N>PpA#3Bp15+cDXU}JDB}{OI7A9 z46IbTg95kDQ^{(e%4eJ*a+%ngAM|x*{l(y>Mx+|@;@#gEQ$N($KQyM=XCQ_Ay8!!8 z-S&Q*(OGn`*a_2nVllSZ^Znw7ia3k)(o1}b$FezfIp1TO4%w&I-ItuH2321%SDg4p z7gkUD&&CZfg2mK}W+_#<1Ott$FK8f)Q>EYM0%aU_u|C;>g)g4{J7}LL)&z(ROXef0 zITb@i+2?U72x1_+aWmtJ+<80Jhm+k=#sgWEih~;_4ycUMwWiE9YeRT`W2hEX+~pLn zEc=?vdcC=Lr)VhLRc-9?j<6X>R!_~CKO>x|Bz>RbfI+eTWN;ZWc2?KFPi8FhYQ3vn zQfD<}h-b(cFBp^vK%qmo8Ksud*2?8bPhI95HIN!Z<@da@DIaJjyh`zY*&h94S~*i# z@ph-o^h`s}*uUIW{n=&Ayl$Gv^vyC5f(d6?m(@HJ-ky6K`FwuVWr0yib>^mLXUt_} zCtQ+&OcERF)GGrpZ{IJ*fA71^PASp2FMqx{nl?mSIjLUd#VSNS`XeHEt=1W7-G}7QyBvo09?$P>IWiZ#JGt1-K*giL?`zF?W4e}JxQuX5IX`{_! zvq_ryn)At0($Q)&ug|D{Y~k{uKe$ui(bt#O^#fF^nuWDw{mocuD&dCi0i5w*@MN=n z2A7exv1h!U6y!KmO6AIkGZogRGc7Bw++4QgU7p$$fV(BHaWBp>nZ)T;CC_6{i5Dl=n5h({&ftSRF4@}lg*)RB+d z?LNdSs@IzPR?N!RI@n$e$e+K-dKI_1%ByQP(Q&PAy$j8cZMkLKM4|Zp`@LAwja61C zLyx)xbeSSZ2ykI#F=#FkCM#NOoz^sx5_}{DEet3EfA`|KM43A^5XRe1hGM3CHs5LEqB0f|6p4?uKG77*QP_FxY%X`7m2UJ4F0C8^B;Gn(FV(OzCn=&RsMxDaR8Wq@^ z7%kT1VCN=P)cVF+D*ub=ba$%>adiX_GnoCRG2c3bU0Kk4Q(Uz)rLj23R#a9l?xvA2 z6WI1Rn*kDreT~JSE;m1anPontEeE8Vg(kWfVoX@cKbH1oBoJp)FYd;tm*w^3yhvgX zJ*pJDAY#==#4`S(St(Y5B<+UH?r*P1!O#Df{0I_)qpKhDP6&E*A3Qm<2uiXEHMZ8> zpmH>7XFj`>B9dl!_wTaz|8hH817~m66Ie*{V|iKLUx;E%GCDKlrD)vv?oZ@>4$V20 zmH}z4HR}91{TC80A_H!&nYY!}jX0t5?gWtlzoB(&LMVX8O@eFmIvf-g3WGAVv0z5hX( zJ*&Va0ON-FKqqdK6*rCWTcj~c7<{X?WiGv$SZRyQ!PYz}*tpCAz}YyiYjq->KAd4w z?+>q{ycy}X;7NTf%LQ082-2;8PE9g45G3HPv+Ag788|%44A@jV5G}o?zSog}44Q0D(kKKlC_&tdKVU3))qdeP4J)Yu?24&}Jz$!u z_0V?)rxdAa+H@0BoE}Kt=v4bri5U^bNn!+K5vc^b@Kgt!x3*PZv5;T$eYq$6{@t^j z8?@BPUjV!iZqi>Kc?a<`TX6I^U=3@*U%R=8KR1 zBl2lWocG==-O$y}Rqs;;K5(HkXz8@Dk`hz7|f55UH5$J zd%@xRF4-hY%=lC8ZMny-jF&DKW~+MW&XrPYh@lyAtDbSgc5fB1;nP(?&3{A;hRe~h z?u&BTT~%B9D8qUXabSQy@O|>~pxCm{3!1p-4I@*l8)w0xLw&P}3kJ^*LC3mWi32&( z>2n!MO+|cA?(GDNB`=l^)ma?fYbukaVUv|39S=%CiY`W(ZYnYeCrosxORH@z5ueGl${XOr3^=Tng#S>ggs5%<1wH6Z&DOhPM=mAiDnl8 zYRl%+zfOg~CV42axvQ@CCUP3nbivdd1J4sP7$dyW`iq?Ij%|=hE~~~q-Znc?PCe12 z!Uax&%#=h2?8--6G4#P3&XlsnviiniR}OaxE_F>(9@A@w5NRfIb^>`DbG%i+n6MHurqFg;M z>Mx)BAvp9rBH$7zYv{{#g{tp*4Mz%-<{3n_g-NfT0Cw+cKx1xt`-IPUN)(v+>+Ms< zQ+;FH98p`5iwRwCuT*mL0`ro4mCsGK$FML^*GCW2=P&`~Hu|TMt`-_##-k@MQnNBd zgpo9P(dv@Or2QJ{JkDGOX>3KG%>;k2{wd&>_ z#Hp>+QR2eA*A?m5fWjNU?*cI{d(fqd@s?3?vRht=N2)Rl%lZSiEw|i1sBuV{*qq^W z?c-xlDmSr?-^(B~wTQm-mv*_G#efT0e!VI=d0jI)P_hsA&g zsUL zEq4?LzOffuc(HoipG6!YVQQLT$vig3W(4iVC!O?oVNr{@Nwt^td zZ|N@@PRQy9_8%VeJ2Bg!=y(xgvC$oqi8=={Q)`QWC#}J|psqBDdF|)uSp;qo97f@BR~=z&7^H zsB9Y>OyuVSLnzW4HL60Yiz(Z@pJ9w<7-sB{GT)8gJMIT8J+CTGQlI-PLexeuAHom2kOK>=Y@evrIwXJBaLTYx|{U!v0 z+m}E&Va6dU2CQUgWpLGqEhq{`lwB(VSx>Y7#A&Jfl^3L~3txBC<4fpK6(G9t9-HkK7ts--=$e}bod8#|O&hn(=P;AFS2 zk*2MPWrbZ{A!K$-Lk@&Z%H`IE3CfZ<2UZP($ZHe9WtLHCDPn^R>^O_xJ51p1msyWc zzeNTBn)DkIwE;GaIPd`H75pJl#wacoK+eDvl2b5uR>VxoASi~bzcB~aLbhsV5AntO zTVTaI4u?{MTxH*SF`_4XiE+w;|G{>Fm8{!SOpIzIZFqCV7Z>|k=63Nl8NozeYIsd= zlZxj}z(jPV^`y57SZY?1+%fgCxK}@e!j)YANsE9?Rp2Ld=WbE?sA~vot@1ws|s9_$YGhQ!5Q8-vo7x7jo`dEFPEBZpItK=BTL`2~i_B84kssg_|V-?O87((PHe@Mpjj zzEC!}g9uQINu5d50DTi&(z&-`uH7+JF2xKfZU-&vl6_Fo8!HT(@fTna@Af2py31JX?l89k0d$*~>9pp@mNNsY6VjpML!f4} zLJ7!`4oXA@ud>SqmZTFfNw2)${VkPkzUtV6`895qE)|;;>af~?tP#S*1=Bl51Hyy= zN7jADONIpRu6>Z*K4!07xf9K+^$gL8m%?sZ%U!N&dG77C(i*cspJ$?{%9mdMMOz?> z5QpUcf~x86rU9g$^!W?PcW^dpky@)zVT35dA8{i*L)54*Yw&^~)cD%>PTeVA(S94F z0H@!X0vS=pP9`fB35ANoU)Ry+ucDn_69gcJPHGpnx;Fh$i=TZ!ULCR%%1+m~C}Q1L zY!NwFB0R<5jlvA42sDyvZ~Jt6C7GK77+W}-!JQGIJU?CL`)n>-!_XQD>{;&pI-@GH zK1W?286ki@w22nZ=6Si}z+<7ST7_gWeOvfgUYkAwe2W(`D62pJyG?H{kz9L^PIcRj3m7BJ4VJGs+B3V-NF)jrn@Ub+ zFv!$IUTIYgmy%I>x`IWm56zFV`dK9#>6ab2}C3@)X|a)e-&f{=ztp-FrkP(SgK&OxdiU6 z00UDwmH)whm$9~qHcy1IUcryh@$h!yg2E?SPsYja1taC0N=?aPox*=!lqjsWq5E$w zBX7<3+RfJneERAN@noES@Z~?0?~#|z6lRn3v~W;#xtOW4aH5!o6;^r6g_i_n?%$iM%0o7@g^h^W!%R;S76r+UP34El|kTbQN7y#sIQISG= zqDp#kP62XjP%^5j__G^B?O_yAhsDul#>FnOH;bK#t5n3=cI@_=9U}LX_9V|U|7v_r zl`)dyn~l<|+y??`igA)~Vil>i21$tl^|+ATy|A!A_jJbn!KtQbbRMZ8P=c0WsjZ+DONKc5PD!~j3fbGZPS}x zKrp+WlX{fSaVZKJ9>=mq0xkwnd6>>z*(Yf^)t-u34vl>5V}!5PF1mNG|IMa-yWRMy zt2giEGqIz#LWF=Tm|ufOUrrCa-1xSXYNfUGOm$d3XVCBHbi6u0t6W$LP+wz3Fy*<^)ldxeC{~++%w+i+AzH3UpsP zIK1AD)!5#*BeqDF6u|~o7;QJ@mX&*aJI^fy!&qh17JL4k-%b8&^IWG=o2mDy{Wz55 z{b(On&u$4vfj)hjX}6%R2WtQf2o@WVEdJiCegK(M;$IdQB^K%1MAtUYc>;hws;17X zs$^q1ZGO=z&Wh%z`tI+L;E5fd^}+6GX?vH|6l!|{*qiym670| zZ*)HRyr!<3Qoo{3;D#-St@^ux*y$K;-j@}LJz30)`E|`GnE-m|pSyr?uo=Y*49GRA z_CCir5dbOB7$sD!8u2-nCYdF#-hTFP)sw>~^gp?YrEh(Z+$4rN^)3iN47_{d$#DgO zyPe~5V~;&z6qRxgyyyF*1`%l~3P8sD!(|(nLtip_tKZ#GOY%cB)-?Y%CJEFl z{-mes*Fu*khzf(FM$HKZths$Ln$nPW8=-hPs2H5Ih4|-DfLopl|*%x7OCKkr?s*&v}{&5V#$q zp;Rsi!loipmG5x^0eH8E$|(?R8*pkFycq3mGMz#lm5To+URoXk0YOxG>3H^|Gm4y4 zs)clXGOgqmQbjO!Yct>U~8#9OFoXT_j;v^ z@qsaH;r9i^dv44kny8aAM2}!EUTnO3>a~8-%M}s3aDh=9>+?JbOZUFAM!+KRW(1~o*?HjTHstFRH~Uk56fYSIY+MO& z7M>O5EzNtHov6jH@4&_ZqY+{~U_&*Kj#8X=>#yfOW@uq+x}FG6ej@+=Da-~j=~$xz zn;#nuV7{PEp#TQDa1;#U9bARTUAVg{gC;fB-vcW^3YAH`fMd9{;qfV>7w`o@t;^Pc7voUT0tgOGo8x1``4V{UMe@aV_Q zhYkzALPl43eo9@5Juq>H(ffRb>t^!>_}z?}4G$|4lTD8HxpDV=x4H1pDvOm|&m=}6 zgXLUKAavt+<{Db#z#EWhDA~wloTG}LpJZA$);0e*mTlGm%v=a0chjnhvyZ8p7ty;H z97JW@OG6+Vo1-HUD9!D&6z_hv(@U#DagO%UO%VW@K%TBW)8BZ^;pvHr#*^Lh+1% zYsu3Km!Y$}**@5;Qw_Ty1OMr-Uca9OBlMXOh4?l3iT^L#(1VkgPa*GVbDTEHveEO` zwG|7GFUW!H7mPfrBejVdcJL*)u=!jn>P()%C^=#3g{y(bSssgTm$Mqp*D2C*e^Lx_ zx)zQEftDTA<3|E$TAhY0O38aqPkja#-y4oMRMzpS=EoeYYMZ<&iq9J z%q9T7Y8cJ^^^0(-Gfs>0RJf;`4FAk5@fGeG2!~qoME~j9-=?5AKa-;U5T)T3K|+og zZtTntAQ=I0Y(NQ?=eeiCyhM)b^g1eu!7zHbGuHKYgAkyWh^}YQ$u0GETxWJ5zYT+{dsWDmRXv?YVlDlF{D3Bi40l z1i1OvshBnwRO3c3gGl@!kYVJMaF;6DJ|@S7l`aVqO;h5WafFd~O z4LGaSll@oq&e*Am^4${g&@dC;xW<|i0Wd{IujyiliwAna#>^8+ zEBnT75Oesa7JdMK0(fJ`C$9@{Qg21NJR0(=n+2GXXFT3z++M0%Dj3c=PlI7wm^o4zOZnNsrWcu|5dq1(e$MPj(6U)yl$di2|%nAh5Bl z)x@y8!aDO1%D2RQR2)vl9MA)Jk*|ZWk%%P8-be zv%0}H3zJD6b^R6qiQxOae!kO9HKyRWzEb_D8A>+{;xQ5KjR!py@i1*2d>njmRUA+5Q0jEjlT^^09@w zMBPP|JQC=ImDe~l650CRQeY&f6%ND2Yn!8yKC51bArP%rBP!e1pY|LJE!g6~clkD- zBhln1#B;2i@pUZY#kYg~f-trcSsh2hhhF{AT<3N0WpwpW{l@qM!H~$8mtHb{p~_J= z{~URaL!7RC*L4>cq&T)00#6iY^|EaYiOyBdH};Gra=fP(uq!y@Z9arwGr5tiL~5$( zeDWov)Y}I;X2We^F^8KXuCn^lPYYFA?S0>m`9Fc)gHjqCP1s<3*j%ePv6Fl!Amm1K zRwkB{@X|*RVgvN+K1bzX`Ra;czlFpx#HH+4cSF*ea9C7fpVlF~{8TaZDD|u4lY+&j z`piHph^A)Y2Ub8BhQn!jozojcC%yz?g2 zAGpnb%FAxvoSy@S?bY#XcSGa*)0MOPi-)rnMH6wUbvcUeg;**VE-J zxycM|DhU^ZN*|OjX}ah@Tq}bNJWIr8jXnL6`LLt2v8sLdEk8nuW1L{OVyQ!P*@kt9 zNe7G{`Fk=Qt>O?R4{>7IU*b+SreSd1MdwXlMW(C_FlrU~-L$eN!QdSTa&Cr!Nx?@q zY>Yr`Ks0#ahN&ZE?cR;PCSto|>dFm;tr-xXz70}Ag{8BSv=L{j85Xe?KgfOxoREi{ zOhy-#pm}YHmlM@cSGsX)lOy0YuUSNfp5bP#WDv#kGLLtQvF;`NH6+aw!Aim*tWY}6 zYi&5TRgGm^Nq_(~aEiPZ8u9{Nr6u<57m$JFR#Qimf9ZZKLH_WT5{u6Ngu>lslNq(H zW4woq8&o=$LNNXTQe7W*VvJeRbmiq?P!+kq)~j_`#lArn!FTt`g+Su`%~=e_6xGYU zGpjy4(kEp2R8WDHN^#a(+89_epz#d4sZD+Os0ENyEtP+eJ^GJ;r(iE-;(IyaFOWkZ z{Y=~niEVOkov*+dTEZIpav%e5nu>urWA5JRFydm+td4~t=PrWplde?YZYzl~rN_X7rB8We|<~e2d1^e+JNMWVlxd z1+CEn6|_yxFAbUz`~pg8f`c8jq7jytJQ2Jk1uzSnJEn>~=+@=#5Bm~K# z(HDAlM`_*yUQXY6+Q;-lB*^3qFBTDqJn6pL4IJa=|_*VY;LaC_kAyzk5V;fzW&hKl|FT?**+xm`Ynj0bkhwPLVzL z3hRkv1r5Qy$SMHr6|20^k0n_8WvQ``QQJX&SW-#IkKi?@=Mg@v0U)8eIyy52rjr9p zT+ov0s42(=LUg6+&OPSHr>k+is48$;`5%JyL~acpe^6!Ir~3t@birQ7b>R<|m|!Rn zf8>IU4){6zDlwWf9hM!`Z60r!+Y>&cif>$BztwZ~ZB|R1Ii5%Qeg))1tbJg-$T27# zkIt_PnfV4HO*X{VV?oRxLDKP-fmC0Yr3Tz>E23(|rPVtx392uwy8@Wkwn5fP-frRq zh;cPGwJSCx5ynWJBi#%`ER;G+#q{e7)Oe}cd1`MeKDwmNGZuVvZF!uL&$xM`?>n}j zB8YUBfP4FTyFoXTq_$3tf^N$Dka^8V~(SZKoAL>hxbWXyA1EAD_O47 zp_Zv|dHmQ#?xKeIZivjQ)PqS=dQag)ejB#A7sCs+?ZsGz7)zq zf5MM~yc}p(Qd^jeL*}XAZ~7=;qs%iUm8Or%AURe_XAi`Gz%liW91{aj`?_$5w}!m; zWs}g$teNW-kW#4qo2Rw(w~YJLspqW%UP@y;W!zzWOU+Yv%Z5iksq;us-vOy`2vTwL zE%7ZniBV%erz&AdcwSGVo)-9i=|=tmwtXE~46#05wpSi+Nhc=*3($9Rgf2lRdgdE^ zHrQyn)M(%ivg^FtUhf!qpxHsgOaN2fTu$f9(&Jq6`yCKbjuUI%%}$N+vnnav2_0t{HiYi{Yl;P7FBn z008f$2eT}>*D5+9V=CM4PLM8h8$HLr2})I#gLv-;!L7D1T5zwa;oeN=D%mssPV}p% zhgeK-sOdmX1zr#)J)hf?RlMeo$ z;%i`4I-W~Bs@J1k5Nn5u7>~Ii;0ZfpykCa>NQcW8>N{{PS*=z^0#d(yYRSHC+_xrF zeAQx>izQj8*F%z2E>2q1NQmF4-q@Pxclz4vzlhKBdF?38muph^L%1iwV<(j&Dzz{>S9*iDdn;%^1XCrybsYo>#4Ca(kr6!f& zs1AeiGjf9Pw*XNo;+a>JPnFsf^MF;kGVCk}(nyPEvEunc4 zE2;qk&Vi&9Y}ojg)F%1zxGI4-w2%_i5{R6H!K!spNeMp zTx!b<9-!AU8A(-hf9BTl+mp`B2YH`u69<72pk!v%$9S(8{TOfkJrL9a`ML;6S4)dc zH=tYlfH*9_sqV^^E8{frOxLP}^SNv*zLTCD$#3Qa&O=WQ^}IV%7qt%Fp{WL^P)`GL zVtM9NL)b=-J8Z$jyx!y8Z#YkI^MHi>8UJ@MxCE$01Jt4pRLmmyxkpUqjDOqfYw%tW z!|uSY0Jf~QFB_anMK&p6Vt1g15!n8davj>(jb_g<)@vX{$?(8)EM3#Dqx8W_WirYY zG3#7U)}5*(0W^sjtCY}*b`n}XqnrXf^pn-GJ8zrLh+HcjN-v4epPhG)Vn@f22=<)x zmU<(GITM5Ga6VzR&QA0C`Ej^e-m#p$n0$z5Ejt^~2p zuC8_6h_r)ZC+206+Sp)q?iq0CLhLaRy?z4>t7l~7s_%>pX7IYm9>bG1Iz{dMn>kVj zr1I*&AOrX;7T(1&YJ zx-`B*f${rtR`B`8CyA5btmU8@pJqZ8);UfL;_aDqHP9i-vns50J{_cw4fpAM2Pyq& zO_M`oXKYH7BA0mhY@A+IUE_vja&{X~7v#{Y%33;;k!$e94oGzszjw0ALX>3YYTJAm z&>cd){dkMOvBexl1WQTy0^uuBu8WBWTEW8REi_6ux?$S@D3$l9g&SeGl%WS>KWdp z3oS2{*rQ20D_7`z5`i;YATH~o&Pgn2!i$JDwJ{nPN7lIjNrhAsMydOO`%pRr6WXoyc>rx3 zxY3ton}A(9{Fk6cNJ`G9an7#;pZN&pyPPPG^ub$ zp~@g~qfxbNt=rvYv!f9!3&=)J598H1+WV^-t8Aa;AFSci$FQWAs1E9f&LeD@fHrOO zh|bsAnpHhlD}!g38QnXcKTc;|(In$KAyQ!Seyzli5pa9Ie4esFL;- z0)*VRbc>O?*aEDo`4&$0BQ?%j#i`g-jdkL}vnvI_g^5j|e?|S)xNi*h>pg5}m|IA< zlRTdQrTb!WX==C%5QN_X;T?<>{-r(x?9rh9N-JZzP3g`gBq-l0hh^+|2_-!6KUl`Sc@0G||=>H=rn#KA=*@B*8u%G69R z$uA*?UW6gc3lr1S!hT^9#OnCn8l3{|hnxD#SdMlv+BZ{u-s7z;g*f@- zf^p6;z$hyu{Mx97De1c95k4U{Af00TfZ$q~*ymv`$#k;#TF#EZc{2}+=U|WhF)`ql zz@3^ltXY#c`nrqHn`WT|dK@yK`f04`nU}bC8Ex2l(QcvP2spzV6{6yH@@je6yPo3< zJ7O0Y@^I26uo>W5I=&lMS1{la+*`MAE|EKSVeNjQj->nAln{ zI)+q>4zl7lqawM_B@VXc9)qnP?}u$%O46Apxzoa@*->*E*_>xzDRqfJq&q9=-P!lH z4d5G>gvL9B?8a(hbWopT4XtuL^{)OPm=(a@>be)X1z?gFagLVU3dT)=QVd;%;hb^m zSX@f_utPNG{lXa`QSeP3El?DZc^m?1>Z_c)AB5vKjr}65CPiIKh?xK>X;OoLSvH0* zT^tfFd$68T+my$eNOfN6L}%QQetj{B>OB$aF`RB{`Zcf%(GwXm7{Ahp-MGjare4+9 z37Dc(`2>(16&s!3fZD|ZZK_ILd~C-L3tA#S4iy7-hM>UTEf@&r4E$c5S0Ry<>(>{qIw3R3jQbUY6tOJ!P2|)2I>xH7$j!xN2U0TP1uLL9v1}8^ySvazu^)0x$ zLl$flQ5UlnxC@|op9$KoJl0jlD&h8)dkebuqITmdizgWba{1tNtA0X&BE-pgnC6oF zY|0)Eu%dVIL}1$E^e<*MEngK>^m`IE(WZPYRiNt9-aj{%%eKut{j2a&yI6xyb90_$ zY8vxB`=|(A&^Rb(>`~?6n*>D+Oa2v+O0lF_F9@J4a_g!A*eJ}Vq=5rsETBboSJH_` zu!itlp}9;xzxwgGS86fv&|+s(fPc0+CqN7!vCZ4Qh0_t8oc30$q>`Ath!3S(MkAnb zhaZAw!_AZoQYnmSia>-++s5sLXg2a48gnA-1b~n^Uf*u?a$~q`AkY=1yzY$hwVn|3 z9RMRR%*UJ7Z{^4yir?Zmh~^Uv7Imo$LBRRSYHnj=O2yzPR)j4;5TBJN zO+(s^>Vb)fJzm1_Bb~S2QU|3u^FjmzR4ViAENdypK-d6a;~Tda26x*6L3T??mIe2P zlsljiiDa+mri~+A>smr~rA5btJg&oUJR1j|AW(txSh+@*h5dqwKXdaIBbjClj$1@x5j#phG4xoL??kK`H9C zc{FUdfE22@d>yY>LvIq6c0g`x7F&P7GwN7^H3fX!gl#Uk?SDR`^Zf!FL4pD>M_6K& zr=-Q}P_yxavefHUcXvX(zh42f)`rN=R^vdaG@roC0B@ms?t3-?7;E?tKxu#Ly|W8o zbk2=8gmsvFmNeCQs7uj>8$Aq|IPfHe#R<3WPPM?v6G|N1m=74O3<}o0#NZ^vDafu+ zAI^;tSg3<{YqE=&wr(N(`0mr7Y_SU<=|L5FhP)eX>Hn04v8J!h)qNaHvz#BQ0640B}qp2uQ~HILjDN+;zQ1MLo<6 zB+QgE=0jcrI+yl98+3keA1c`WatFlUFDjyaLKNQiiO-I2t55tb2?WfRJDRgKYk)6$ zH{)xN!LPYt1lMmMPIh&;I`7ub0O}OAcHb}>9kk2or zANT4laTk)LZ?roHtSeATk1qHv%dF0Ro(!iJN4x0qy3(n;k>{yMeN69;jo*@nKtHIq&n@Oj!j< z8&t~P&(u0F-2b$cjzrWbtA~XqcRg&%jo8!yC;wMx4`aC<4cw26_Y5qfT#_CU0`>x! zntuHy7?EbOp}CDfv3OhdmRRIFP=uqv*Jp047$@V6U{=;=NmE*~W(l%t5UJg1(Z7IG z4az(ls>=YXHF?+c!WWAPK_KN=++%(2{(kHzHALwya8*mOaut(HP^yMN*3?Sq17407 z$Y)SgTRw3Y#3>=Zyj_~ysJ1*Tq$ByK^`BYsikM+iWI8dEiYnH>%&ig3v1%6|KL(<2 zts~-aq^+K}!dGBczB4Al)=fg3{sg3BKl3roa)u7P`Qfafjo4p4%S^$4K`G4}QJ+|# zTeY1W9Hj_h&CLZkQQ6gx z@t&3maeoQBsk-eXZPg5{YjgRMlh~8^u`|wV`%E=GH!2vU5}y-b(G#YfNR}4Wk1Y>!5DQ#? z*?<>J5hZX&sz$#|WUsQ;0x^JB$jxQbs=bcVv31ia=2>SPub$pmU8vV*KNP1$pP>(E z*I3x>0FjXboS)9jQV-PtOZj9T7Yn?I@KKU$sB-i}fCc+HLdaEl= zm(iV`Ze@uoCG3P?nk(lXs$br+c=nq7qr2Z`3ZgI%JQw2!S8y0iqY+q;w#3?v9K~Jw z@<6`MFv2k3?QhDD7!)9%km@u;Qyb1|T|uo@E9>!oGosh&u<*sEpr{wup}I+<$dET& zxhOWcJyw^5KGVh|P8ULT{dC^X97&{wEpyF@0um2Z%nd~E*)!3@P1K*wqMnh5cn4$y zV`GMa-c~REs@no)o2caT!B(tQMxmngunN6DcQ~hv+OIsupOkXn z0piEv2qZGo&f%2vnmi)O>+;CTQ;lvQ6TRhN9N4U3oy? zfw~l5f7G?Cxi27^O}e-_tf*||+RN)87zN7aHZ=9A>twfTLc|>KTJK^ja4Xn;yz7sU z*aLM1IzRSm=wJ z#u>YJ>Yu{=x&!jOJ`Y%ngYiUyZ}57%V&3hz<$cs_1j!!i*)j{5_Q&tjm%+ro0de_P*w>Z?o^$4V4S0!L1`F<&s! z7+BVqAjRS=0Mv=$sLIZf9_kSsk?~QI9_1er*?|PH7Gb@-73$~~Um)>Je4@@(!EAz} zT1ebh#-AGSzD&WHVI6gxnH-!FP6=x$aE)TAryZw#BC!aK;~Uo&<5xC4LyTEvDJmj> zpegJRN0VS$zk_IzQdwtPQ45zpyUCv=><8+0xkT z+SH09zsmzb<{ha!pp_B+$$w~K$x~X+Z->5 z$`6>xX}X$TIs+*kJkb~f%w+rM01#;ulz*^E-+|q0F>n=bbwx&)W>Ll!kvHq$DDfa700zwq0|nn zyriaB>An#g5c3@adzsCFGcQ|}D8GL|FR^IMLe_@cdMZe~D2o)%&!i$v(t8f$wKR{rIZbfN81alJ zz+#=FAH(r4H9oCUsSdvo;^9d9dPEl)rb2J>SUU5d;Pmmm;8xTwCJ0<0glpEpFQ_2!Ckij#5!LN7dkartd)D6J8@Odow-MN@B0<&VkTrtOT4^(b;(f zizJlbctNbJ=bP4)ZZEs*EVhrpmWs7H%DHiGk`ga@k>Sa4RXdTNsCwPEJ-3!MKP>mmL4|ovSFNk@7Xq?w+DX0@Jy4cDA#br%MG|rqeXoFN& z6B)yCVrA`mxjH3v0W*h3v}j9>6vm8ml9W#CZU1LI;@%^*Si8lBvRg}mvPXv7?$;+W zUXB_fs`zB+XaF0|fzv;XE*2(BC7ib!4ed{mwf+z`jazG)5{c$vRb(4hjn&7dUV;P2 zL~tsdY0;O!cBiu}OvgU``TGT{mG1f;t+Ul}N5bT(_v;UmQs|4biIGvtC~ALA#r-rN6|V&vj2~o5+^aY4fzRwyvdJJ#Bj^sm=V5PKT#-QkyP_k2sD^ z-kP*)1yPXENwx_pdModC3>J>;$|0|Vhd5S>Exzy1Y2*o_7Af4daF8w-&L2JNU zn4><-H~YK`LopXn(fl^lTBOc&IC8}*wQ=s1{%T?eF5<=Dv9M--$Qt=C&BT80uyF>H^5a6b$=C`!{`x7Fw3lj}d6Yf2$8*fC3Q))q0}`8CcTpi#dP*Nb@ftMv0@Fn^!L z$X<}f$BIvm=wtg5%|1R~OEk^!8~%>`6e5rm^)Y9JmcI8cEJRt+s01hX1K zz^u)pjZdzp^juv%CgB)9-frM-Fz-jy5b%zpu3Qo_0IVtQ7+89} zy0rCf0^p2~@4x{+>xKUl*g^`Th$^-3{@4@OUA{Z9xcV)9&%no+aV>sf9CW!e$JzUk zmLor}X+qCc1%j`$Ia<3yEi!Eq?Jp4^5IFiI|i1MTdlc0 z2g2s-82L$~0!Krw#`zpd5i;GXgeE^CF zs_>pg0p6j4d7PPhON6CNCw+WHBtDPiOZzrlVUKDxEc{Hpg=9B)F~d5`R+QR7$_8oi zg_1;x9`nV1^9Bxv#2ZA~w-d3x#5Hk1v_sJK&Xp3>>JhmR&Uh9b$!>EmKD`Ixeae{N ze4cuodO*ZQC=IK%|F$SGFTxvMKf8Wl0a}(1G8lyMovpqO+%dzHSC8L~#DeGAa0jgd+9=XXM!RfBi&W}(LQ-4p*3Mzk?wG7Wd~UkXp!m4Z3Ytg zWW1B%8VptBh-hh7{Q0R{&mfo=3tGtjeaMfiuhqn6n+sq8Gs1)mK?h0?WP;=+hQL)W z)ya2gGpQfjlI1~vQ{kF_-b^jF@fpzxs)X^dJ&Y7s#cy%DF>Z zgC-yT8&`Y(CrCF`vj&_xA*>p_N7k$eKEeqISrEY6@Mh{32m$_ZBTZV6JoV2K7Yvk6 zzZF}kbPkx*r`SX^3N=mX$-lMp`7LjIfvoHj%F4FdjZHLgM0ACJo%@GJDtt(8t;9vV zMpxm4u;HB)N6I73--NLur{RC{mEeU^HE1QN5%E@tHCUfBCb%;ZvRYe~n^WZf{RW`2 z&lmfDeT3kk5aWvP4JcTz&XaWSN`zKqaP~#lh=j+^`V3hK^ht` zlsu$I0kWb9V4{2n@ER!iPbie|f1f_2dn@ZqZOXeXMj{c%#$8KdE66p+ix~x8(#f) zx1EY64*K~$Qe($5#;N;pAQ%!u`&wYelS9?9aIP-mN&Z=lh#CZ=Ue}>-%pWKlu50-o>-Ve%FmvRx9&<_VByz z?-4ol=JEx*ue+2}%PV28E~k9+pTlNfDwytc(Ygl>cpsfCF*tOd1beDba>(Q6(XQMn zmJVGzVmdvkX=Uxy(uDO!)Bln@KNI0n_^10X3B(cw+68dTZ@yH;ilQ*xvGb@g`gBq) zd~T5jfvC5<iW3#|b2VOpY&3jm6A@9w2bPNK4 zun6-*7^A%hm;8svZup4oZ||)_d3gyQu3VoiD;BRbcPUnwWsZ(sE_zy7;Wg3FR-Ga$ z?w)X2N^EWg;*?Stip&LiyOmz306iBME9pJ{J$+vot&x|Ll9Cd5#lvYgV^Mw}b#lbu zq_R<<$u9wrcl4gq%YXc@KP2p9>}5fQS#}yJ7PO3$cDGYA!z~soKrz}OkmVIk=1XZEmx#u zemkkRGL`i3RUCU=H`LpY84+CiK!Kva&?2p?b8yvMfwHfWAMzn*OjIZ3giWCTf{lcE zEySZEnuGc2Ve=cwV~;8pUTAo&$?`5_>pMTO0a!V9jUG_N5u|N^a8OVhAAtG)EadN` zhymOZ1<|`8yrR0RT2b;)z_v4d5s`3C=+6B89jCtI1xMwLj?*P%zfg2I3N!NK8KSeE zobMk2;9S+}a6+21;SFsdqg|uUdM*mvm zmTc7RB-=HG^-#2xk({J+$%9Erqm1Mc3+FDj!Zp|1nkvzzXpxDRJ9^(dId`|NAGZ9Q z0Gd!_47%H|K8cJ8Tr7VQ1PN*dGDVXr2yec{!bd88Xg8nRiZ7&L{xw777vRL_w{KD= zhxQ)WeESnqTcce)Sw=?xe2^{L{ODlXBq6R}G)Bw;pl7_9vSA%8wBO#hM2}Ou5_+s% zT&}*#d9k_Cip@AP^{2&JvAzGVi`Cp=40EU<=9jAUwUo8%L5VszB_rP4eY=BlGrF%Y z`Y=1K3KnM{^$(H)7V}dQQxbXWT?-;{>?0yhw{h?h3m@l5NNhTO2+V$6fsp*uC;n#Z z376RAd7YEhfU2^yqFPmi~=jCj94(st^rkDkcf6Emfr z+VpfND04+ee1uanOnRWOxH%*bT`5tX!#(_QlB=?jyT~W3(3W3QVchO=8h5C^5026H zT`JS{jp2mQOwd}WB_GOgrF#&y{BZ}8Tju8w$S|&n*@)xEtV%3yQZ$lKVvr!gjs>Ta z1C~>^R8pkSf->3ludb_dH+w!@GDe|?PgUm3wn|k{%-`I9THiWim%RBqTG)F?c z3hno0J4BnJ`mKDRT<8>51k!mx$xx4JT3#U(1cm><{hjZ}OyUE&ndZyZ=w<~f=G<)= z%;dS}bVR@PN;bOkC^!h;uq|CAeXcUGv zOs-5)t2u1odfNS%jdh%TZ_naUa4C6@#&pK1)s?|i7mI{JgaYxj`9W^{bd`~QW)6~& zm|?U7;zqbH-RE}_>;osimA;m(UQQOq3Y}h^KDeEavc>s=Pli(f`kGPV%Zt}3SIsU2 zQa`NQp`(6}AB4TU*h7ID>Fa(a>7IdGb|IQSKNK{jAc_W;zg&>4^MhIzKlv4`LeC(yMx36Ujg!!$QFZ+nyQoTx zq&+=uKy!zkX&rfQ&>cp#UgTj$JH1<~YH#X(r>NSTqNR#Z$FcyS#j`IB`WwEdV@1BeL)mns;o{ON@e z=*5eoNA`T3>NdllV12UCod1tbZLWN{)ZqiJKL+>e$-hl=IK3I*oz zqaZGx&G$g_p{?_GZ_~Ao5%XK;%#VqB>G4UolXhuLA5_%7=i(R>8q|N^F*^r&!_Rg8 zeO9U>F+996>F6xB?4(Kh+H`fZm8?<(=O=FCIrklpzv;l)4__7-|17og$E6P@|5Ql& ze|M3bTzzzX^Nf4ah4OHjAUpKPDLq|yJLQQ%x7WiR;3^w%`QE+Bn`+Zp$&)_M=c3QF zH}+aC!!*rnhj`O}BlBDG<)%`$j11>9uT{b7B|xids@{pZ^;4g{0sT&cNdl!Ynt z#zGFdpefugC}jj5d=T<=z^@OzfPoY{h#iXF-dy$g-yR-$eAZ~;RGPwi+jwqDi2@3p z^G7zut5e@`*#h^D4leJL)aP3!3_2LqC2xA6*>A0bcptoAfW32xY}7|BQvAr z#(0O3l}8E`^K!LzluOjDUrqk#aQtWpySBIog)&CfnJMdZTlK3qQxXztb$BrKzL>!qIXP7NJC8Xm{Sq4|0s?vIm z)DX`7p5@6T4mRF#q&H>K;rZO?y~%E=xl;x{gZ0JZDTcmYl-G;bAk$f%(p1FoYoMAm z3)lDv>;XxkS!41k6Y;;jROPVx!<&@a^Cs-){F0;8om1Bo%nxoVw08q7Wd__Prc}`4 ztBaBs{m{;6Z%Nk@X!}vgpCsy(+HXtE@fySwt~tpB87kPQB@)~A#QbEWnWPpLjArl4 zCMh{fuI~QPH^k`(A z+;z7j?>T3)1R9A6l^qz!Qh;5X>^tYPVk@eMrk7=k2hZ!2mNk;Rki8CU+KBr7Dzu9s zJAYjgKbg}7V=f$D@D_Rm302IZ9Zn|;L$fQM{=eDk!%A)F$HJ)&qePSPAKMKy+!nho zdQfsTrsY&bJi9LrzP_4hwmdk#5~i?@QFEhVaso3so}IFZSqkDw&ddcP{WKR>bd%aN zTrRMws5JvzP8qKN>l|o-nwu|=?B$LilU>Y8-;I8{ zA(98KHDk!mc$^twNo{ZrupvLQ-e-B9yDgl~%`kvaTM_%Awcv$-% z3ZiXkcEK;#yQRFEerQhz7fhGkjU2l-Nx9u0>4UCsOM`la`x>K}QiSZiTzj8G`mNwz zT0_^uZ)UGcRV1YXbMYhbOgnOKcJ`y5yO9~_P0C(w#8_6&6L71uXZ%QElac{@kNPX8 zquQKFy0?<78?8H0b{HdH?}zY0>v32GgI(TgTO2Xjucv98ZF)8kas`QufjR7%{w=cw^CyyA#gvA4mY?J9?ho(@& zxeCA+8URZ*-3Y!9)ol%z4b+FXzj>uc#BFlWQBH5$n}gl_KW?gOF+^jveJ9VehW7yN zeNkS-DAXOOB|Tr4``-(CLVoZGF#K=7IyEEF_PX7Jy{9VqXT1`L`;1BeyE*%1p=J~- zs$Rv8rAaPClr50JFpQu?0jKcmfE3)9?bhk}&EwdEs6c{1tE z3VC`qcP(3rYevf6wJwfTew?8i%7rsmva2eB0^Ip$mUoivRReRkaDy7qosUKh|!iZuuc7I$^L)>*=VKvu_BRE}o6Tx6U6iI_OgP<*)*OU|QD)nKN95yfT7nCkAx&CQSd6Bp!sd?#)v zD9eYlvthbMjw`njqg;0O24?s?rty6T%|7%4x%oD=jlk6BG^n%RcIueQR?NKQQSZVR z{kqYHce;1}JQ||lAsOHkDR49yz@hDav)5{;_`z*#$>3j>3*n=lZf{3_@AB_$`8JMQ-#AV?emKmy&=)? zfoP|xU*`dK%^?yp#>Yi9^)7v=@NOv=ac+p=F}U*DT&8Xx!8-KCo9MZk%@0bsUEI|+ zcGc+G4c&t!v-TD2OiJ|dsNo5Q-tYL3IiKDvpM7CIbp>1}bB`94Y~HJ`bMzgLOH#I- z+J*CDi>Hs;q`8EBLc4zbn_FwLzu!KnOXb0ZJcLB1b2+%Qt|D2+8XTt2SJLVD?&={K znavv^bA62^Gl!GsUn(S2EcJUY3{@GSQJfkSXXefUC9#7t?&ZoqJ-p!O2wppPPBh&_ zL`1|G$UZSSy^m$!W!8aAcXTGWqYvKM%C`0MTjbqUX{wzOUi6?U!BSXn4Cv;$qE(K1 z6sz@L6G*!XbE9&d8?#bgGxGYYOZ!jqXO2-!(0-%KEJD-t8|09sk1wr>yAKsIXlUl` zh3V%rJ7V}GTiXmJIsxj}X6>~&L=1972cKlhRJ@}rW%GxyqzFDO3!{Ln=LKm!x;CA$ z*5jrHt;?$_jvo8jF47KV5j646dd@7xYmrpO%0zPCl`FLI-gpo`dK2xjZ5=yRp@J1T z{lv$rMEMf}d*|Q4aayFWyuKcsZ#i_zNKw8o?D`47r<+Mo5@t_uy*fWOSaiT*W+vQ^wo8v~*6H##CQvc-s}+9y1X%cn0UP!<;F-~}`HkWl(o)GT**zO= zU)QadDX*XvR+0^+H8r%n=e_i!sTL82-2lf(^{r)2A215s{Hu+rz4EAl`jg7e*uQMy zQ?^wJ^T2gU_B_{UoIHZq3ha|dp8QQ3=l?~A@z)V9S0^fLrt0QOIA`-N4>p7@4AmUS zbS-y?T&N-_J-i@a2kzM*oYd)Xu?5q7H(4(XumR~(aEcN9!t$Ka@=$-%+CsXkw6x#_ ziG5;XF~5T^WFH4FYzX$3#$U)=F7N9DI~~Htu2pH?d~~U=*pAH$Bu0(zlCb|Eea=07 z=N|NCXpc}f#~Ssfx^$eGcFa`p=tLd0(H?r;Li+eDVTE49UL32cU;p!fG3PvHxcr)N z#{_+kLVnc*UnPOFtaJhMHlp3GWvC!usHK*k_z>NXD(YDEr~6Zc-tR^7-L87o8r`ljXXQAF6hJ~^|h#}ItsGP zc`k7p#DQOpmATQYs#a^NYFH^9Q_b#ra}QietGDlVOtdc#)|1evjUM`&k++r+LpENv zz-Omv-CM9r%`Yg>x|?#sd)|70M_Xgmu5ainak&pKJ6k+|7w#wyY5fH(Y*h&#Tww7~ z^tXb(ke~i7!lG)h%98udJPh&wvGtt+O=Vl#@p{#nu`rIk0M2*-Q4s_IsnKx|4^>1! zYEYVhfOJAv#|8>|La$2iC_QuwV5kBDh8BcCsEGz4h7!Jgl5pp~@B8f^qL}Qn&n~Mx z>sf1ep1vsS@&*A$0IwB)SYgj??C>2XRyD~i&k38kY9szXIzTDyUUmRY$bBWpwdNu% zBMgGGuWAf{8**d>L-#&eVn2QQ3jlLHdgQVS%bugC_m%vt%RJd4HNGx6rA#A{AGSWh z{Tkx?K%&gIX_X4esxnnGk0*Mvcyfw?y$l?vX=m!@g#frR1a(B!`Kt8K|Eo%a?Wql!Wf=q1^cuHMi>Az@O^>{CM$Goma02e$_bI`qku-qy7tY zm-2wG({-jjMpL+eKE~?EiE+2=$xE0|9bG%a)txQD%9=Xq7EM3>-~YkR!RTSa^_5OfL*5^ z=E#Bmgg|QT=U%=C(b{3*gZHpFosyq zJQ&hVUAF2Kl+mdUtD}T*ljZ$=>kfHA9fvcj-Zh z-^GK(JLoQyEqe7T0>Ho$n|OifiJvI3=!h}O^a#ii@voUY!u==sV;kOjHSGJsEeXZO zl6iuukc8sfk`liG&g^hm-Q@o6E){J5HIKjT(@RV9Cx3#$Gw45tFQ_%ecvY4$Is_^6 z76lVY!r;Q@zwPZi+MOBc7g|kUWP$4{51{)-DjHmFV7!+<<-3pbr#xnFpgbub;7Zm8 zZ@A&7^}zt;Vq;K%s+5TkL4yK=6LLHOb7p;_Gw$DmO&*5#iLz&IjIDp`8I;@Wkx^P& z3dZ|ZH|WRbU9={wlUiKdp1da9VfM&h-A!{?Vm5e1`%0;n^>J5&ilz}>Qh54~l6+>DQiAK_6eo z;)Mf1x1CIOV8`Ln;RS&`f*UNanEAYAZ^5#2g##xob*DIPY^i}An3%^orEc^rzYx+D z*ib^%HwZ+rru+`F&a}DsmWaVo-Ua=X;bFO#qe+7Ci*law3unAaPR$w-n%nliNMA)Zh`sv2S#dY=d8G%Ct_^ncTv)ANaJf`8tEtlstnwg zQlNj(XGQENuCP;|S)5n^2OT0!?-66nO2~in|c9RzH=@?vw!59yAvhlTo@aaU~(oqU5;5QwcS7whMiTbGf^eHC%FY?F~nkVX?f-xho*$ zeRUwl!cheyWpuTQ!RG+3N7kg|tIv|E2-PP^oV&&76Zx&a{ek&k1 z*y&I13930s%JH`N>c@&~Nl!Vt;j-{i`3#0`Q6Y+aZ{mB9pUjE85N~~ipE+`ZU=~9S zXt?-aLO*zFj58-d3t5<9Ci*!a1Be=gtvbC*x9(p6>P#zA1>StojL)3-&p4OCg~7{& z?;=yDxLL{T;1B z(Q%vl+BC3rJ>?cdpaWYEpzYt(-9h5@K0%xQ&%^*taBF>0iWT=!U+1R|e?xQPr21x? zgCuAE&omBR`%lz72H5{1>24RAx7Qj>xQ@GB*t}&+Xc%__>O1VHjo*PT4kp@CSHssg zXk;iiypGWdNkt++X0)we66uM~+A33qW9T?&98kUXcBJqtd#hhddyd8##&$1rH2gBMGy zx~0ny`iWq;lRvGNZ&jaDx58e#*He$U9oQl`|LLtvW@hb?dum$_djtUBs{_vhz0s4qTa#z${S*0isFZ?n z2xDIYdQR6{M*1SKE&KfE`*&@YiIEIinpVNF2W9V#v8$bO%jFqoU5#3r)zl+SDXn@h zH^t>|yIK^4^Sq4~pvck!Tnt0Qgk#Wb(3U3B(ev}1|uLYC@z{ZH>zgPiptAiM{ z06ZK=(uMh+mfyPC@7=p6 zE@4_rt-o3WLYju9+ow>##l4NrBdUOxUZVj1cYp-NtRkG@|x8*u(3GyP0Qk2T$cIkwm!6gZ1qYOaj5!<*#vd?Cv_H zOzF$wtF(}#1wM=Y^Ik^rre*&26pH_7N1@+DaT0Ncs^90KYmmgLd*4TsA{yjfp=5c)GKm!P9?-Noj`6%AyjHej`=GsF*}F4Qgb zH)Sa55??A`I~9#|eLpsVCui>yJWHFDVF*q-niih?$+CSvc!be|W^hs_kRf<8;qX%Q zm-f57R2z5xp*p4`PD+O@o>L7DEdA@x|9Wx^DNr^5uYsiDzrQS%ti9dTzfYBQN2XoD zYFVT(kldL4i~E|2K>PA+oj)gLOy{gECEev<4JPYl_Dkm0I*)HwZ4%nuDMzlK*ECH} zO;0UX&|KV;xGA{ufbpP=L$)Bumy>&D?Sh1XcxSaP*BXyG!+ZS~XQRP=^IzuWMc9Ac z`ETpeQu=V>n=fsiTL=+30-mR)C)ZYbl8LUz?;ZXe^veHDw`3r`<4saycf0@eghPXh ziz-WA*#DcjW7U9(Mw51QR}y4gH(r}@q8C@}vG%4`e{DrX_!`bb2s3?y`rv!jFDSxV zZO&H_pHSEwv}5g95q9F2`Fm=c|4Y6HA8fx=BuDCkf|MHvx5G>Nrvi0D@pJi+cFOZp zhg_;T6IMbMv^Ndr=6(nFotmA*dQ4O_Gzp!x{T5Qd@mSa!SsXVYU*N*|;4O)ZvSue+ z_>-61@DZCK0CTkO47c-l)g}H;WIhN|@+$eG&SZy&@U6jDGqq|kGf{XU(j+cU>}nBf z@?$%(QfA#X8!-Ut`zMt=#KR20j4(s9HO0Mb9KWaTpyuv+3Y z9nTlSgZ_?9VQorrl7Vcs0rhzNjM6Z3%&2w|*#boH3Hi2UimDWo%~1Wms#dP-)_q!Y z@5!4%bFq>XZ0X|-mRH9i!J`FjKSq-@L=;#hp>mO^J;o|X)b^9x9)VD22}$L|ngCA6 ziZN&YhaSTzRPMWKwdJ?b_M~~8D$AC?dNmLy0im0DSC6M>=BZjWz0Nff?hW{hXm(Jm z_ICeY#b9sAFjc{!(h8%ADAY{;+G?My(`DfC=)mj(tQv9HS>+UXyN73=y)3*}slZ=v zwW>;<`NMs6;2Yw_#HO^|y7$XaIzB$wbMM`}_Y-NO*uk4U=C_v`_T|^ji?#G6c&sdj z;sQGjQrl{%NT+51LTwhtl`>I@+h4@i+zbT!FM4r96bWU$n*t=XomTVPwl3|JG?O#=`AbFVnYNk2tD7~q_AB~{ zS*c(~em=^$Csb#Y)3W9sY&;Ktiy``2!H#+%;_eV{UzpWNEhxh zTCXX>9n?g~187a2R7~(!BAGVDtokEc2!M;=J)Qb=PQ@jNk&R=o>=7#~Df>=Y`nuid zi5x4Zuja;lt_3TkZJ~8DMnB8MRkw8e4Dg5V0?93ui&Tqf z{I^Aum3#-Hi5e5A=bn-aw+nZ_ooFg6aT=%$2t)?m#=;eo@En=N-HF*hiJdeaT5Imc z6*&g!Qo>S-F_-tDQZ(s|38>6=I$6`ruk-g98IBzy6T0JcXn^vC<$lbr`7cm3q;Rx! zO=bK3WDvEkBdnMproRBQx@Scbu?w%x;z6q!?aL1Y){dd&g!rxAIwqRBPp0I~`*Int zFYVq04>6)DVBU>HWXAtnGN?_`xq{s&T;Myb)W8&47L=*DnWQ+@j()wm(n^Nsw6qeL^<1XMWU*Op*M}0%v zH`Yg>4$W7H2Y{R#;u{6;yaBC4!_B4h{QAUzpE2L|LJC-tIV=|@T`Wc#f^~G=JQCfs zLka`FHK!JE`pf$ulB^M~ikh;oa7fR|x#Z8}Y-VFIZf){VJ7;`#bxN_cK+^18G`*r| z4n^;$q0C!s!b4S4M)E$)bU|t}xTXkj)nbIa0DIxpdYJhopZ6Pcryub@N~#lO)1caA zZfPdo;DR5Y6(bcuMk2(t##tB0IFfo_Kf(S=&2(wBG6FRT1+})HDjiKy z;nqY^nay`|I=}AU-j%dQb#!nt&Knv2GG9zch@!TSj{eu%OSQ?0W^WVj8B|#@yMWk< zus`p298x59isng>G!e4|YCgOQ;!#;Y&L@&74v_wW{$f)CXX*I2xmE_BAOICCz zuZV0cuN`aN4uU_H?WpPbHO+x_zK0&oeUND0`m4SSR3b$q$`b$c%+M9!0`0CyYToII zMKN|UR2NCzs4whzWz_&x|EpCFQfFf>UUdG8lAKnBdcfW54hEm7R{sSs z5&<>xq$1bB;=N@3TE}qWL)fs7_OGBSyGpwm^8R0cwQ-jU`6cDiPrv$+Mp^{m+5r6a4VI2w7@GmDg8&R*ab^x5NGSDv@eQ zk_9|$;mWUC!~K0@(msgYx5Rk^2Vy@AM(ys)2v^^V_+uMn7h8~(6zfH-De@DhKbqUM zX}q;=EIy1 zT<#knXOK>MzMsnm!uP8Z{(#NwbgZ22) zT+U!!+Kh$pP;MiS7e)0l|3C0S*VgurFta zG_tg`Bq~OlXp46-t+;(Xe_HwIt&5;jhjKifM@-H3|~VZ7dp!^23LJSky! zgYr|eNa`~vD6b<-Jsd9jgOqVzs7{GQyRuTAG>~(7cZSw(h9VgOsGz~s9==GW%&1%+@8Q8GMQL1eK>U_05%k38}^*P_~rmQ zZcR6B@b}O9N{WiZ$<#!HEeGIIll-HAyis(ETog2DpPAV2^fEO_*X<%gF%Y!=drm*M zM^%4KM&a}5^RX-WhJI@5>P;M2RnIf(dD)GI5EDe$Fp(>^4^F~GAcr3N$wf>IZyPex zd}vDBjs4SvL%Nt4+ZMS%s6I{-(6GgRM*iC4%h<`NT#(f@ePye`T4Wp8O)-k93X_;M zCpzM4uD6tlQ~bv5qq)odb8x4O0_pn(R_VYp8G{Lr;Ct30Jgsrgigr~n*e#b)cU054 zmbr7Svb1Gy_@8JsTg?6g>Vlr1p2E&~icOcQK!6ZO?(6ej(^D7h@!GTl>;XVIP*WGe zQw!0>J_WMsO5tHv0!=w{D6oT*SwZ`ImktoH8+#fL7=ur*{F!f9RWelDcdBwSB@=9= z4y{<}@!1OkDkb@QGb=$(-Xddi^7HtIqv5x5DaDns23$@_@CMaHD&tDTqH?-5y$@DR zqK-nL7qNySfO7g|H&!0^j5(TA#>z@EFKeHPCBBw-^v!MWqBxzP$N4YH(%Q78HTk-35wpei_XiJa7)FtV{>BIGtQ!}{>BIK){Vc+tiv~m^KHSlGJ zXvi<~+MuU1VsNi@D2lpEqWMf*^WGx!Bx(Y52=TA8dEA)gUVvrk(J0J;0AX3j_E~|$ zo^RuF<}p&gi_`DCE|h7g6Z5h5B@CK-{$x3L&`0%CY?>IU@)w?;>cp?-v8$~B=E#yx z5DrIBA2^JCy>!^?eJ6V?GCJ(@w;e%?okzV+_P=d~ScZZ(4$r{S;-tGd_e$>AFSI2$ zeVLh_nThxN>MRMe6f}@II4Y`DS4UbF06lFBroSWtZVhrPSNhVS&qqn}h=1+$5hu$& zs7VyB+jG9ZUZ>B@bu6t$F{~nU`pDSe%I>Yhm?t@HhYaoJnnetn_J0~QXJxNP0C#mz zMDbpT4a^~0(^G&s>E?>i_fm7-4({&jK6#1R08x&|2H-V+d%7`{UU`egz%3A(=DK8D zs=$6d-j!9+boaZImq*6gat;ohx=dAs+HLR<)0loy?LqOV^3UA;dYvcCVs3_AVpcA3 zq6yV6-kwzcQ+Q=Ev&d~~2vq9r&{nszo5x8#GQr_?oc)@uu;7OkeB)7aXFHsv@A=4e z_HQU$gS}<=K-?HoeT+1ayQ6FKj!VpJXojFHK?00cGwHRi-J617W*ih9-O7Q~?zC-! zb;M%_RMZZ9tb|S=pC0_?zb!738UiscNqZSR78cflfiT#L_5={WB?z0Pcl(QxHF{6}P(_BLCL@Cc7dekj7nsn>1lKMbBTr3x zy`83p&fr_hwiEjDa0;{1O%gTnVyuM)&s&4lWsd6M^4Y2G)q(AnDVO_Y{F8Gi#~B-h zvfqG^n3E@$2mPXk7jv*oVY?#5$ZQlRqK^6!bW~_J)4Z5sVB4iSr zlmN7ujdmFRM#zm^6Qlqd*b+M+qbA-y4Xs5|bw!%;-X`o`+ukQeEbc!xHoOFtixG3s zEop=Jjh2Gm(|;&^bW~7QJ+r;!eT03nw2eusCqLiv#at`wb}htlx8#Y~s>arOb_yQjDI z)FGWDKYCrc-T0D0ckhDsfZSA0+dGS(8BS?=`F@rd;)Nha^nw1vv#c7Z z4whCLw^7z7m?1ax9x25gcGp?;GT$sY2O|Ll|5vp^-p!$Y93{1%zifhs&7{kXkQzcB ztoK(*K5WR_LBKG5eZv978MM$%*cZ_nC{1c2oIJF8EfTo!H<3*+Cx-eRA_Mb2gUjEEM^A zNmfxfj?6@3QkZl6B-Hx9N(!bya3H26qgJ+9mGtF;@`N{HmUN@@Z|UOYxk6{@DkDS% z8T@EXFNJjq7>q<}!TAYW5w^)HzC)XtE84_O7(~0=UqFX|ud$8!V*)Y~Q-Da@2TtmX~oIKr;&0BS8n5|6n$mG<(mVeLO0dBGqhW% zq$rmY#WpQbw}vW9r}x+Y=$rJ<>csCtpHctHhlZ-BV6W~->JVHTX>HH|5V=5ON}Pfi zNk{w_|G0qq!0X7+m6IBJ89@gw8~@(f4qF&ea0pOW3<&Rw0FhE`?VJpCWMLWOu%R4@ zo0N-ouC*;(8!Xp7fTFD^Y~q*-^p6D_ z48?jI^cZ9IdxrQ~9%fIfW8>_w%VZ!;x@+eX8B~xGuhVln_!}BC0PW*%JVAL=%nwqK zC+QQE#Yu;9p8}`iQC{=MkS#c+_hX4kDT8)i&Dau!qofH)U*F;@VYl$DN%-~ti?K(+ zCZ=#0nzm~vNEr}KgzZJ36^I!3QHkq7VDM{4hr@J$_L0<> zP>rQ_!v#)iS&ymp>cSP%pT%lN=;n59iRPYBoL(pLXX34)5c=Fe)S|KtYqifPaZ9kC zZ1I}_FzDdNd}8fCuh6nIS0_c%fej5KHep^k^nEI~Ezs6`F9Wm~fxMM4_>4L%>b|;9 z@|wAHUnI2w6oCM%KTm{Q;I8;ICAG)8N2|o}Hmi6^4L5Z{9y&c}tN*GZN}9P6@omK& zY1tf$PP}-*n)r$A*h>Z4%zZHD8MBXC+50}$heNO5#Htysw~}YswsBBXY;Yz>W^N-^ z3Eu%NORRoQTqBdodY%yGqHgM@Nz}IA#pias;2U8db;?BW`bfDLpqLu;0N`i8k(|Kw zi58fC`0Zf$mE9|?I(XOiCrjTZAmeUoj6l=|rw=3PDlqeJ2$#dCt0c`QM_OATp#=X5 zYv(>lX6ArV8vYHJBZR(-)|o0%_SJwj)`98@G1x+N$+aRFcD(mhw!w zb4*F#Ku$pz9Ir&vN}O7f!sMW#tC5cX&rv;B#lC>Z8-cj9<=v_}a9R~fs^GLL0HQB> z*xK2vZq?T2+(S{aN*ngmQZFRd!(!#pZUWOqU4O+pF_{|j&shzAT_8guAj(Q47Lq4y zO`l$}DuudDcR88GJhx^*eUYvHCrA=-;Nf(Y$RAX8@@i=Jsb%{4s_(4>oTgnJhi1V* z_iJwq^;?quf!~Lz|EqIS2KV$A0q#D0pvK$o0IwBF(4)k$wGlC~SORrPv{*j<2c}6- zdtxc*g_S315Li&JYuCcXCFu$>q;6y|hAN^R#{RK1KOASR|Ien7LWU!#@e8Z*gaqsd zGZ*CE6|>OdnoY#UdxdE}t8|#TPsBaCR~}6}Vt|WvPNXEu@yVT{4h{20GMYy?I8Nk1N}0#L z+8YkK<3j|(279?{EGyzZ1P}Lvk%DVIph7L+gpDDez)=w)xg&WRZl1`NIO}zpY`Nm& zh=hyaJwhBU47W>r2O!~T$G#G)Wjh<6(jik#p*20QHo&xKkO0>{JmH3W4w~`{ztkV& zTjxG{n#>My)e;|bq}^c*9MWWRQpz^FhF@(dvhS@p$4N8#5gUf|rR8N{mk?HP%P)aQ z%C?(^A=URH7a@{9vhu2SEeEoTYE6o=z8KPATzgqsUbca_(kOahU2yF;A3&5dYS;3h zy8YG#lB7&i}BFsjmRA+)Yy#T9P1kq~ zH`friSS59=p{artX&!8r44>`S-%FewQ9EQ2Snt|WDZ|LgDaJ@mqXRlq=3Kw-_wVFg zDG~f_A`c|~Ve0pn1^y(1%XVoqi=b{owV1@pt5a(By2W>s$qNJY74{apr2!R^(af^2 z7Le?H1m)jJ+Aycq+F1ul1k0}X)2F%BY(}&@*sw; zn*}0)8eENaVUP+Bpt-!?$AVv)Zoex}Kke5P6(P$vJUq7n6YLZ74$hiY-U*-yMY^DT(2!g60^5+z@Cq=q-lW8-$ft~8~nYOr9 z)Tv}JD*UIjqy2+2pPH7puT0GZ{iteh56d~aK}yj!qoTG9i=smOoKxP80CRfnH4f1y z%;G3fM&Kl3_HVN5E4SrDQ0sSZQZ1K@tpWA|Om~o$&!ljs5u!M(YDiggKrK5FQYazs zlG|SdTHuY4-%=XHHSVBGk2aylI7k6%kp0c3L4}rfEYi3$?-otpeWtVjna3+tl#nB{%DUwJ~_u z*c=#VFK)yW%w*n^H(f}mI!1b^A2B(IlkTeGGGg-#)BDASTlcg%_OND&aBF zgC(y<%dfg|3W_J%Rj2NY733%iV0NJ?a=~w86(!^L@Uga^rt^6A{HK6Zq)0e0f|?eB za(w1!%}_GT42PTd_W2wkr6S6zq`vsW^NmoQW>FSta5iwc4P4(0*v37X}5tpfg-7eLB@ z>Q(T&tGSYD_>PSiVpb_YZSH}GV^ZpAm7>K0fj(KrZ#rfn_nCnugxDm?h6C0%ifia^ zad$&%^iTdduZzlDMdX$}vaZ~3I=h9iMg?4?y=?-cbHBIi=;{r3ldb9DaNyPCp=;=l z3@ny@VTBwv!+IIJcFZ6NQP%E=U8cX@i(OazFHJ8&M)LST{^Z@<*G))>p&G;On#>-A zoGd~VBZ-Ksu2Gbxr$m$1qSGWB7WQ}O;(=^Dm%DJL!X=;uFioClA38WFpE?fMO~9Hi z zi6oRY4TJ@}oIgm{;V3b@0cr3PRMjIR*TMw|Ku-o_gt`&7%{VqHeuB<6)BLTpuzh#m zVKc&;LgF=qS_7ZbEy_|OVlWT5h~db=#C@`gLEI?(XFj}e^M0ln+a75Gr5&jHsI`WP zkunrvkN;|yY+g5aQ`*}zWYc|MsTqQ?Q8;m;P5v4PO$7gfG`JHEG@u08Ai|NYwPFpU z${>!fMi}FE4#dP|EgiRWAt^DTS0WnD6seGG;2rHFHqzTDGK7RkYA0+Fje2T@CD6_% zFYXEZ)%OPZzIm)EcZ+U5NG8uBLYT(a2e#7(KB9h=m451c%0-$kI;j{XK2nr_+Yf;k zR_nO-$3>u;EoZZ3@hdA}TA7fprruX~Zvr`8oHJLELmEuro*C6R>*(ETML@_fw3!)D zE}^<2aNxryXiKkfUC>V*JT3Rfsin>~av6Im$Lfg!meU+$BzV0wYhjthnF}(BQz|eu zF^@T&vF7OBE`KWj)Q9~g-LvL7Vb7pgdAo9dTH4F%g-@KNHGIujWjb-*m(n8O`-8#9-7h=YzPM6-{;TdKH=BU9WN9B)U1Y@^iG-If%6$B{w?2|Mv%D49^c%<8 z1ChR;AAfnxrlZs5QE<;c! zDRL%C3y1y|KD4ArH*s_CxE&S=&BzzV_4oDlz4mArlBvvOjAXB+wSQ>@E?-&gn}Bzm zptI=Yo(tmpQuhr&=>Tdrtjc4A?K$RO7_ZD@S zX-k|aQFnoYe=TyLWEZ!=RYJluYJ{je+9xL6A)xf(p1qg9#%*hN?2hTP2^KV$@c!HL zJ)_OC`qWdr`DR)v{*sfAtirM=5ld< zF(X`leD@qpVLseU4u6HFUDcXU>OMc!YfN<2ra7J1t2X?}-#EL$V3qpHY_9b8a zQ#VaFWwuibmYriuQ?a3+D_qB@o~DL6nuwoJMZiRar~ZQl^7}oG(e2mDZN^J#pPGsd z;nyUJAGqg1##J^AB__ChtEo6Y-!gBfxRRqyG0OHuL7f}wM-J{eD0f_y8~y&zk%~+P zW;3X{sn^$2{_?X@!=La|)wCYUD6}v0$H=nf)>TMg&jyjUmSDs>5RuQUuAu#Zl5>R_ zBI0{}4z_9n)Wc03se15r1)aTJ%@N}IT~(g?Ww*?XJR?q94NPp0RmQvd`uXLCfzvkj zMi1u5kmzujHZ%16#1c|1K|&y-64Cy@OZywZwd5bo`M(6mtHHa6&JfdDUtk_RM$ZXWy)-SlWS_33cKZAn0Z$x z(W)-6A!K|gcdtu)1oTjq&1SX5$+};|ey8^r-PG(W8?bAOmv@-<$GZyYXvEI2>_Z&}Blf2ddSeeT|a_)VC8K8|nsiEq58c|h3)Tq^K1 zJ=dkpzo1suT|MNFBn9!1zy~+92~FNQ(uK87z`00z*nGP#fhw$FONpe8Kmrgxh%|Vw z@5!}NlyZG@>q9}WYjm^k#e>AGX~?Q&*pHED>QVy$kZs++H__vV7C#fG_glTyt@S0X z&DKo@sYrC&JfpKhefT_+$#nPE&oV#|8RQQAmQfhkEuFb++0lnjL_RaRF zYexUFYT80^JKwVt0rYl>4nHhcbo?xD?&`wv0O4VzX;xmc!~e_J37s=u3G+`|x~lSh zl-gL#3(i#&95c@^f-o|~ij99~vGRAwuBU?Dlv-=`K(dN>^9_XhYMaF%0m6QME;h2$ zm|%TAMD5x>6fk|@L|)ob8SQjUIsn8>NPg`@?@D)LN=#Dk(;n(!faz`sWIdl)eUU%A z7vI*xwXxI)2%Ezt={|V?0Bpw57_21eo3^d9rki1i^ou5(Iqlef zy5}_&TfQ1DmQ-Fbt&Gt+`*d#- zpX!K7&+nMV4MDiDFemY5_vQq1N}O>bwE_4I1&-1K&#g5PS?vj-lN+bu*CTqdl44{9 z5W}plwx<6LZJowbVp4WzJtx6lxprp%U`H3mY{5uXZa$?<0o6SAoCnsyi;KycH?R70-Huo9_{OZ+3;`I!=5cAc{ivY+ z=p`q@X8&r3GpTc0WXul`R{RhJ)a+Ku3>?VYHVMN?I7(0eX)q8jC}h-0qQDz2+1eSY zk|{xsa_0gQe3288Ry_S{S`_sJn2RD)V5FlrI_zqwzfY6TIP3B+%n|a(k=i>A54m4> z$IEmIe*X3T?ydABQ!bQ#1TM-ajTr%$N)b66mr`wqCy+})869a^(gZ>zuP(-e3KS?P zXO;wzLL(7&6RLI*6j0N3G>8yavyOu+$E91&u$*VbG(zbafwI|Dy zJ+Abe2D&}ZN26!Mr7P9Zt2o#qaddU4;7m}^AC1*h{&0nP=4ISZ?OGENs zSGe|(Q0z*_}5+I}PvH|JJ?Z z!-e~5I4x!^0FfcQKVq|h8f=!^X6EL!Y19HOdXfn$g0C2PgHUe{N*dQXTqVMehk!9@ zAGG4LbaB-uMr9S|-g*lMaYC-v`*vb<%7nTqbJ#8y5JY+}5wIj&fqf4yj z>m7P)RStJ#ceZx8WbsC(Dj-QeaE-V5q)zgUfr&>@rU*HKI&ubg40VPq(@p}P+P@rt zWpbpS!TI1+k!J4+&zcTdJJX=>92Y`dE9b1*&dzW`b&53|6jO>$**2L9%k7Zvi%l~G zrSZ5e=ZdDA4&n`w#YoODz3|LV<$nzkG8fw>lWLtNBJR}u%!hWHw<}|r0zT+>EdS!k zv=^~s1m)V=wM7V5a0*45>tE$*GTVShEwA>bqeRrbg|Jeh>W3qlsUyYZw^O;NTDBc(t#(9>BCH3)FXkTZSkd<4H zid}MDy5n0_fISC1e0kZ}MDsfHA#3_$*y0*x?HUQdIes(C$+}DqLIx0y42cQZ$*sFl zmbZw+z$#rNQ-potzOm(1|A5Z5p_y674&S%V#a>BX{S;2iMl_uL=af#D>`3i`0mz~N z=w6cW!c{PTpxrd(qbI+l126)E$uECR8Y@pb_TxavCfG}7UQH0}_=J>gS$R}DfYqWa zJpl;pM9FcWxM1e}rTIXJl!A~m(|*ME6kW^W`zH^^I^n1YWzN0b3}Mb$k}Lq~B+7hx z%w-Bpn3;**#!4=JAOaC{+xARFhaDRr6wV{;&hL;Q?68NIsn`FEhc_;2G5ew993F)z zbwc1m!t#{>M_VgIH|^Ss8AP<$eY(8umq0IFx61)RCIm9LOm0;VF^X!fIZJxEK;yWD z-1q~1FJL0!)e)BH6kMc^y!k~FM6#Une*w3fy!n2n0*!Q_d=KojmTL{WaXn_veNQ$LiH#oJh}iUs68}*9y7x+|;)K(y3Wm5qpV9JFA z96bWh9OW%d@J^m)u)IXBIOV_x2B<*sO!*RdOLU8Je2z(P0q@mv+2Scv)Tti!wm#UuVzJpPN+?i#rE^n0?r2p)G(@^&ZX0ias~ z(45X)7st3(!3b*G!fIi3Ynx8daT18lB2=0GpwTuvbo4&BUqv<&ZUHFm0km9peD~K6 zg0-yfa{q6I*w;Q|6a9&aiGl`A8!SEkvrMvzSO>2Ft~*u#j5n?kN)9iBC>0nHHWy;5 zj+0ygSTaDEmHvCoOC(!l>JpyRF}u_{Sdqj-!h^pk{Q(HOtYkpL++(}p!SxQJ-*>vK zcj0yr9P>tlW3o^8CZIQ6f4x0(Xpo0{V)mcj>x(Yk&7eehDTkM+hmsNh)caE#Frq~l8y=`g@b2y@k>CMab92UM4WtYx+K6}4m1s(- zwh#`XO630}y}3=dNP?7eCMc2je9=mKI1Fa?!Naj{_IcEuAPvJ2{`J4Ejt@U9RP<+n zEON)jPbqb40klYc)vv__ZfM|q5gu=uII78Gp zcGNDLH#1g4x&(g}SRD}q4&oM{f9tHhJK&e-MfMixPq!Oh#3uO+kD~0FmuHRx3rK3U zqNg*~%)KQAKlS-FuFiQit|0Od1H{%3(JO9bW_M>e34>6-(V(*5cw@SZuUO$+?b3vr z=|j}NC4L)Ds;xC?a?<2{s(S6hju2G5MB7n&79-_)=D6G~4-yj|vPoSP&aIys_TakoAQ8O#eiKHZIKCn@Ao+7AX0jk9V5E#sm zWHDof53NOLxaAlL**gvtriZM_?b^<_-^WbyQ;&TmpH9qH@kwZhl>8HxPEVWSn!bkj ztoaBgX3tH26HQO;_5TGUS_1aqx*+0Z+`g#E4+G>+^g*{2ogTR-Ut*2}-Ic2eyyKNW z8s2)f{o*z8$)zO(l) zkSe*>i+NEHsH$M#dvLJNutB91WHzxM7JTj;P10%NS@dp7m;!;>nJG# z0Cc9vF60w(()sHToCqFIuzI2-y!7#5QY|QNqAiZQb=z!d)()#Q)$drSa+*XjNVI`! zI17c8F<95Pi(2x9VT=WNg~aJ!k4{kux=DV^bLDj+q)r371F)nYxn)9S>_Eo{HZ6J0 z1uip4w1aABDh!32i?igabd|ckeuwkGUdIIMd4LjDOB`ILN|52dG3Tsg4i09nYyw2h zgNeejis9VD^7(h(x-}LWL2O?{6X0{wrPM+5@4-_3=wW=6!|`D>JwE^ z07ccXjt0jpX9qkdP7Nf#rs>Py>aWb7t-#}6-a9Eq9#)S4wSN7BmfFI;4?47qT7A^ zuOk8iM`vP6EZ^Mi11wCNAtEhlz-#M{+HnaN1$<|U8l`5j-_a#ovE`ED*ZPgZK_7m9 zSWW+=ue*8;a9FRYBf32-U7dIdACHx9O+h~z54o(2gZi#42p=#qJ2=%mdzPIV09CiQNXsuXmkbRU=Py+`+%h5vW+?aMY;Ey~3#0 zz#@^y!LH+z?2i7GLAH9{WP5ck8ZXG2I`DO&cQ>PW1Lo_qi+$|v%KSy<1I>+@CWyQ! z@eD|RU&-M*u^5aK!xi2EOJmihnikJbfJkjhZ5{4j)7n)E0xF=X3;6i!1vs8y1hT3G z=rc1_YlH3`t6JbK`yDOj;|LbZ{e7GfF2ORnCum9&=X)K{QFC>{&0${4Dl@pj`AA6M zYK+Z$!Jw6IjJzI)HP26a>WLO$;lIzh1TEVK1OzaA#|}|`GyvMBhf@#!Vwbuq^LaMV z>4XmD`BtxG3r!@JXU%Bc(nJnFOOqkp1wI#MkMGrvR5`5ku&Se1e5pICkb$kZ?MG#2 z%6H&F4?WmDbHd|$mCFboz}LV#*};qXN(Bx*w=RU@b9Qj||HsW4=_|DSLYVn@i8Cc0 zcyZuEBJ7+)8SbGfNR)9uf*Bh~1Jp4*c)AQIyvf#U`E5h^XTNYC+8jf5v@O@S*mey? z@rq{xWmc)Ufbq_OC12K3w_oAe%;Mryw&cGot7BD19QzP(?H17ld5ZTjUQc~o->ekg zJ&yJNE(%=eI*D_0?>W7>bXiq5U-50&4?TeP69Iuq#JzlRAUXL>sGl!E`MPxq&%?C( z_yiIYHGW3j0;?KGFSCFdU6Q`tRNm{Pv0sokLPtFghs}FgKdO%a84y4JwJ6EnU=#Pd z{k&lWzn-xd-}`vQq31j>>$1b3(wqyJCmT7n|K6{j(H=ukSMgfQvD%iC>(}bPxlD|7 z;>zw>kg{$hQrt~wg^O?mtE6Nq8r!7{-Fbq}HKzVYi2?~!5IYXL07N6$btJTIxxW@$ zvDCE8kaMKbg{wJ~C__NQ`PpWqK@OEb2M_rqov}S-!X>aZa-*Ig=+KP?O!wWPVt`76 zFLgzG4t{Ov))%2gkGtaeB*zmCR~JF1TVi7qsHO|6_JD6;d=1+!0c>cp>W9HDs4nnt zaM>REy~Sb`k^&s2>6*`?0!37Rw)EC&QFeKLh1FuegyR;!!e22LL(u@?%XLuTAkUVv z>h2NW`$!A1;#ZkD0;-TwOF*AYpWFx)26J(M z0xA4K0;?O*D$#L4xMWE3-ae4xyA?U9U1SZR_a1|`>uoH&sNv=g=DD{-6qvY9D50tT z<2Gg8gFbw#fsbH4F&8f6g%0bbJTs1cp3-zPB@N=l@BxSuJp4S(`x$XY|747}V?bT2 zu&^F4h2AnHS480i$xpw!>9spD9w`b`?~u0O*HUz@8hK;Kr1I+9pYv4IKWWHCem)iq zCsN-7pGdD&CdPE8+>rnbl;QIed&TfQkI>7&K@$xj!C@Lj`iO8uYk({%2qFU92Ge|a zexJH?+*)IR3#F`dCIbzJk71;kl}y?U7PaMX@}o~u4!oyd9jicnhE3votE$I`b8ZY=JPmcAd7~ z;Cu|UCBsKx;}KBfDzj#(;KHK+bXjb76)R6sGpPJzU9saAiRkvBS43~l!!Tb;KI(aM z`7-+}@6cevjH$~e6z{?W%K<}*gZ1+k5~3NzY+Vr~A-BkgcazuX+0b2gFZ}Dew)lO_ zhUWgLE0a$4Q-8jkvpI35{G7|9KgI8Zn-;c4L_O$|`*{CUwCSl$8c`2qp9%hkoc$K% z6KQ1JTR;qI{rucUB1)Dirddas_wPtxPqfAwg^1`j%Fw{B4Ay6~Wcz=eS4V*LYmU76 za~JmvX+0f4s_Q1X^j%DS9*Ycoan)q%7f=?XyS2g2>=VB80Ih^Ys}D&j-nLMa?=5e! z9Ek-E<$^&6JAb>UC$}I57fjF@ywpLMNN2`zjnVovRA=j?gCdF9lfUjxdF@LEHpTSz zg@yEozi7V`_Z&IbB+~O`O@rD02YA-orId|YyT+lqxU}C%_cpB~ZRo7UOwn_NFimWF z?|>;|7~hxE(B>YGtK{Gxq8Vm+XimxqpPhusKri&L%vsoWpd7*n>YYC}Xo(y?dwV40 zuKVrjp3EzkU#pVFKmiObNtH3(*iN}Ug6I?0n`PN@A7gFVIJt5`CRzgb%Hte3c9|$5 z5xh&DPuHnW>p{O1^R1RZWrgl|QzG@u0WVP8eh%SWZT7>2{a`_$;@SJ%rpciC5^q}9 zyRR=n?dX5T+M1984o&gfQn)tJ`aDR63%0*51&++r3B?b)fng??t-NLFn&&q3`?pg9 zkCPapAVIGCpZXmbt?j5h6XVkzd8MB-o1@XX8n;8vKtKopvj}A(3Y`SE2ZAOgsH^~} zbgkOVZ>J|@A-slXwQ~-7ks`0%-*2DFewV2X(9|ikk{D>~^`t);8Xa#?N!Dgg9GgQw zA54A`(YyuQ)PHs?4l2A;3?$rdHD5pSHypL3vSBAAx=puR{2o5i}O%- z=dl0n+v<$Aa#r%Kywoy5|J$y_gf4RdR*#dCPtXi0?{sFM4!%eMW4V`~7pirDC+ADO z-I00ZJHLEJ%oUKPqFHo`lzBybUa0Lk!Vu}PVjP>NYq(84o9RflzQ`LiM(09@BFlaV z>qQTrwQucG3I%ppL<}I99MEbv#=m*f1shTipY?h5<@rM|c`;A{hvr>!MxLcB+Wuy- zb}8xJy>zwteG3W2(|7L?Kmi<~o1g&u*|XP7h(2vEWpmr7m1AY6XQqKns{g?kw)|=h z^`9>HbLD7ep)5dcG=A>@uAWN-z6OpGi8W1BimKCK-oN^b|HTvBH2}jM*=<_v<3uAE zVqXT>%t#;N{Ok7dwvf>zHwEJ==GzPDm4vFbq{hX@sEjR@1nTTKP1wQ7DfX&sM=Dxa z=!{@eaTea5)o-v*D6Z86?&|U|WsO)8JsL-gmHG-y9J+&iwY|QayXGH5bxBl9 z#1y$D!MjvmYJ;!_vuwIT0j3X#r(ks@E_uOtv@|MSdgnez&<>By8DNPdG{ zcK!Z&5IL*_XXfxht6@uop?wUXaJ;Xl4|C|1?ZfZ33#ue;1jWlO4bj=S&1Va)*Y~J@ zA3<0##86EGP4Gd4$`|bjzFjiD$;T2 zH9~+GLUUByQHtKsTR@sflWyoXFjVQ%f=VFNWJqYCgx|Rd&VF}4zw?KDgybgo+;iUZ zmgjvQ##5zPy%ghs-aBH=jYXW?73ZQSVXp4hKe%2qk7;T>Y=A(q<FIY1+lSQAhk5s~ITpo|+y zd6qA$ekAd^blD|Sl3z?8fvV##(62!J;-jgQ^l6>-o)hLA{{sFhR~mtJpzFcONOS&k zx^R~>{Odt34PaC|sBsy-nBI`U;<(%vpM#hvWLBoD3&dHxM5$Sl9_^Mg@pu`#Y zjbLOrI@Yo~$_mSgORw)X&*Byrj4!tz$X<^|sZ8NrU;g_?V5aVt9q-v+=HA?f%2pS^ z1%#5YNuZ4HKfw8HagcejNv040?{5rI*iwq7sf~1j$X91~G13oocWWMhyA5R%6Dy{( zkChL~CF!g*U>3CcZPS2x&d-3K=i&pfvcYiPXGpnYY7JS7k7ngHamgbT6Kw&~KJk|< z1&LA3j>b!XT%_aS88H!$ZcwUJNc@h#IL$mPk-&+6z-03@i<~V0R5+K1a4Llv1q++E#~2&! zB^zX&bN5(9x8nO#T-nIHZRat@pM5@JAKnB}`5$&`CUmrootsBWJ&f;>v8EC^fyXdmgVc!+CstGZ{9{$>|$ z27yz$z+NgbcJ{#O^r>XUsvUFC&TEt9I&+$-c2vkou_vuJ#rORF-KLZ^?|dFf3J;%_ zqspSZ{IJvUJ7I=>SPi}!x2v;x;`h9|ooy|RO`ywC9=D9WkeW=qtWo}tg-Rgn5i2!g zLX;qg)DBzYcWzqBeV!Luzxh`GxayX6O9GLv6y^b^z+2d&v4? zG=IT0QLx*#M2s!8`jglvc7C{}13SJF1v1FRCyQJkI4X^MVs;5UhIXz(PH=u%Oo6*| zO+OsLmzY$_kf`k`bqLal7t^PPSm&@mCXRHdAy^Ty7r4O+PpcQTfKR=AbFh?vonRa? z;XS$0W$qBJ8?3EWtG=vL-2j+1bYqt@al#vC$s^T(41QWcZ-%N)Xjz@ebvtv=Y&V!y zbIm*W3|?iX3$Yzn6etyc;cQukp`nJhq@giRg1V{eW-N4Cx%bzj1u9FX4zV}Pr4MPp zRn7VwE_hT(;p~@nv-V7(E;K&u^DE}t7HdPcaGj{!cMC@ObA{7$uk>0{i>p@V!pDHL z0SH<2LkB&a7Ap1SMhSHy*4^)x;26;Pw`hH*|F$*GwScRk5T5||Yflw04siDNdD~0U z>B%uQ$J>LEJ*JEyAV`45K=cC356Z*Q;tohX-HMd`g~Bu7WbI@zd5%{svthZlw!n~I zPYdN?xILl!a6@fbs~Syam+hKu*B|Sd9H~J$8e$BsJWw7ky%*`vx6AD-8DDoVJ;oUy zT$_d={@ z)1I`Ki^ce`uPMq0hg?ulZIGL1QLm?=)V5{vI6C-jn)8i^PuYe zI-(^Lunjnh<#RUpNdP9;OQI}YbFA9xI*nEbbh7Bz8a|Fr-?l#>vnp%OsyEwK>%rZj z7tEG!i}{fwZf`dpoa7bOXFSqx#LYFvhcgv=c0Zee<47QWG~y0j>48%4Zhqil{NT`U zYsZhc92;?+dAgn;<&kaRBUWyoOEp2^Xs=bweG1#ROj!RnGNaV{58Ek`i+Iyy)V1WW9va(q7g0i zmJX~-3gLh&WDVV6*i-83(C1KWzg@pB>U4SlZ|loH36zoUGp9`Q^9s}x|CJO7(L_eG z>k|MJDzF|3Doh9rCc99jnQ;p9sp*mC82{5Dwc2KqR|Wd!cuBF#Tj0LO}gsr5g zUPhJZ`zkduw0R5a%ZbNUXWeM;mc_nIq#;c38Nm~BZ^i2LNo0iIyixvy2WHLFA`mym zptg6RI-hk}IF+4QSG{WTHnI>tJ5P`Q`OJ#zcV9I7HpPM5B$%RRcad*MMR1HJI89zJ zY~~_q!fxbR(6b3Dcl0>stLaH@B@ZwM{TT05QASTDtf#HVWsw~KEA$VM!HZKF;nUIS z)_(L)MCXwl2PUv3+AWc{v;Chf9!=sk;ipfk&;vS0L?>IZZk-)x1U5u8ngLCv^mQd& z76M!we@u*hBPOh%Sw?Zc+HIK> zzs*i!+{E_%D#Ft05exKpyHN5biuUbnDEPCt zwadyl|C;ynv|ZYFJSjP$(GDnSZVpLtdVdNZ)x$n78vvWQ6p+dT)_D6=3K0Deav?`A zxM`BcfVha2c|PTvd-nOXg%&!T?&jfGU#?_Am(}Y0RVUCHH!wL)_isXt?3M~|_I@TV8EEJ0&Qp<$pI)V|I+ZN*VE9 z3BK~A*4L!c&v;m#>VtHAVkuB)`L*i&%)-o^A@h8(sNLpZQqrJ5J$v$8hMi+mMzX8f znua4WbQLiK5td1P^!fD!hUZxVKXpjx`PJdnXG)2Ttej}nuaUn>i5a8Iw8~EZz~fx| zEm-)Tvj$sDuMj#4ih9j}R*d)`>KvW=+rQYtAgOx9UMfYbJq4E?s!1+TQqN zI9RkK?YhaPxt#TfwraHT{;fH-pA_FH=7wvb|PYAUtwA{4ubzxBCjh?F9VvUp3W4-u`^Q^XD=C8il#Db4BTPQz~~T z6Jzgvv@IyjYx!HRAE50Tl6#!Sh}GDDkw=GQj+Oh`^wk8u7IN@Vuo=M4zEN&aaI9db z1jZkVW%8Ec)||rV?^=vg495D?U9Hzn`;f^9g)~foqWNI5$xxmsxa@%i3#nW;&P&c;o zxtsdHlk4FZ`426lnSTWqDE(fk?aDkh!DumAQhuQ z%0>1V&8+2D*jxYSd~Oun8R{h9sI?8&79p8;85bwRwdsV&(FGIjxQqtpdDcTHFQ@sI zGvEcjBxBn%XFplb_-J#)8dK)bxc(7q(mSscNg_TRk+&Vce$DH6Msx=j1LD|BO7n^O zcHTSZJPQ=>p-mv`@p6`}yztCZjKr(#<8Sbne(Sh6Ldf zjGIf>xQ1(^d@h`)AFvuNc^26}55FtB?QFQS(=pm=|Ilszi3(jaW|)pVGQ>~!c2HRM z?(K*_mrG%)^TWJi<8|vf4H#GQ?*-otm@1%!siy$f`Gs$d;UR8FB3ETW=Is{-+=3nuX81TeXFe% zqAISrWwJHzp}*EzifU##^*RKakK~E*Xkh7GcaQhh&FJQ4pv)^}gEf`!fwgZ#>g!Cr zB%gep9srmXE4>4A;schp8~~70Aupq&mgc9K0*-Mrq?&cRon>;jP}gj=8}5-*v(WtI zM`R^>fSJFk9ca81hIHg-cC5w%?I28{oKx!WF{(-$Pjt2`lXU_sMSXGWG{2>Z>rUp7 zhYq{LuT)MBA6l{6aIj8cw7-SuM*0enXT+sPqHs!W-G9DVSVTltcYddHtk(f&PDRRD zS|?Mcl6XBBSoVMZeL?KvdxadAtdIVH}BjW8E<-|>RP z+1W!@HLDqjWO1xJq#()qauZMO^&yoyML55x%&1+b&3!11NHPrSRR`JTTef2@4FkRX z|6tlCXdgI=Sv*%K*EsV9CEI}8-bkKk*}j{fzTxLEsQ<}HKgir7(qi||ZHHGozaJ#h z^)2RHbeE5F5PdG`LjUcuf5)lD%R-9dLU|HAC7DXhjM$Otjb`ol+5zofh%^$ zs79>rn|f&lB3&I<{Q! z^U%PiSfunJBog!rJDJ=08#BtPXU0`cZeEh>e!OKI zEQy)QqOmP6g%4bWWIY`R>zvt?N4T^z>D$5!>4(n<&#oz?w0QOtto@#&L{7^lnxKUR z2VHe}oK4T25{+AJ>KcVz${O?QFcAx*0Kk4pZ_9 zGs15zvY@F#GicN1U-llI93{UDV^`+pAGEL!Cb7RT1ytnbG(Gx8w|neDY3z2nmd~vx ztQ{)%pdmp_iNB3%#FxJ*;u^(ukp}*Kex2hFW2gms#g;t_382Lt{3BfyCo`kxQOF=y zeyNp^5d|1FI zIRM(YF~)F0mKod8GJaL#iJJG$qnhl;>qu_yF5?*cJPCEJ9e9S*huq|KrQpzbUwg%OOnVFoKRXGVr@jvanoiuFs$ZAnRpILyF z0z^KcHMTJr31pgGdSzI!56tKXv!gt3t-BJq15drSFZuy_g6}2X12oXN-$5JX?kqhu6jWWN)gFe6>Cv^Y%O~ps1vLZd&B*oVX#OD zK}agA{_1Dq`tKEXkn#mnl=h3K5kHyt3Od6dUd`=#Gcb3|Iwmeri3y>m6^`$4LmG1 zVH^?q&|4r!Fuyd$Xc&!n<8zS5)al#v;gZ&Ah%Om1-XNv`H8__p9s=N1>);(avNrYt zJ?EO5$sG+6J79Rn*N+3jsOrN~Gm~|N={;GomnBmjljI#0nXTqyP0@*K93z*9TBlV)`B(+0gMC62~bxZO7qpWE=IOXgXl<`CyVvhGVAO6|IN&5TtM>LrQ6o?gM5!%euA=K899>F@ff4;(rtxtzydi+U4K^n z+d>Vk*7-T&OL!g8Ln1JjCJ{{!E;F>KM-?9deYSl=F}VsEFM{mFxY)!&RyvqAC?zdunYRFY|C_^l9_XkECT}R^&4$187EydT=y-%E*pP zSYs_Vh7N9seaB-e`QMxT{1}AjHE}!lN#8us%V6psFvTXQ8A&9LSoN6Bs3zhJQtQqE z>~GRFweL=M(uZY5;Tx6;>ebzF9eUr}F~(Go*6OIPz-+|%PO(icxAS~nUFkzO$!YeMjKHJl)1=*iUovNuPW$_GLOM6T zqOU@0NZWqhHy-B=K?hKG$HGcVpfq7ngtSsuYQ`WVEpP#ucL zO~3o?yDJIlAh<}K31qP@Q=jKwdU&iGyX@n%UHKXeag(GS(>|>Ca&jaqnQDLg^`Xe1 z0Wm8xQN;EWol7{M{?&(jqnGrNbOZ01Bez7S!Y2R}?-*SE_kl3iG zOvf0V5f zaNTk3g>{n-iT2y{;NU}!U|>PNq~6LOBjEs!yc5x<={*ndxxU54#SLFId$B#IKo0pk z@Z|qeK`O|3`WR*x+X=5ZLh+zFZ1~{?lNINw%lFI>}3=M8h z8z4aNlxTz?e4nTLsv!Gh3Rs`1_nG(|Lx^>mH|KURwxs0lXt5sYV&6|uR{rMFr~&!> z%06Ey5?wz(blrvpJ&i-Q*BIr-bJ(BhW7-t@EGSesB3OnXjzs+J9-jY-cikIi!)xJhs zJc0-SSyrEW5Ecqre|y041PKd0``gDJ)o23=4W!)abrjt2FCZ;+O(zMt@c=KY$ivG_v-GA;sB2AqFLshfZpvY zl_r)^2X=YTKc$;^)Yw06g*?=DZ2&9?%!xbJB=Rc%@$ii{|PL1taX`G^^i z-^e)ZPE_!tf5QU2p?7K`ha~`HAqOGcma^RsC{-D|0mC2%PPZNj4ay*no6AbHQjQ*7 zl|+_p)n%r(+`0VPD{f;ZzU7=QZ`;->4)zR~$laZHTgl6zqgyiDTWII-KAY$*#;$sA z!@?1|T^6M=(;8LZTvm212*Kk`efJ2Ih!$37dm+=I^aon_rP8yw7s%fJ;5r?Z_4Ywa(h`_h#vSIZ@kGz0wW|g88 zCRH|XFZSn~G&rqx3HlKLWUmsn=_u$22nM+zDvrLN=P9pU13&Y(f z=zW8@pFE1m{|V4E_)BpBe2Q*u+fVxO_^>i9_AL%}PZ*N@uz9(q?l0sXkMrO3!{;As zI|a{oVpQGdGXWYcdp5cSgKiT55Q4#*C*8S`u?1E`7wO0i;BEzTp?C!~1QW(%yKpL`#)UhR z8a~ZZNNdQi=I?M+%^Aw02OGpZo7|@BZonYvA4yppI|n~74bA;IOXKD#t40{$JY}>@ zU%Cw5dc_9~W#IEvNy{!>xIhSVt$h)iQ0i#Z1(-wQyS;n>3wTK~fj70zKT`!%$YJ6n zRE+XQ&cX~UXf5qwy1 z0(KYVp1sWbm3q?l3Pa)V$r+}f4t4g-H{2er?`e~rgr$_dUnJy3d?K=8k+qEIXnoW^ zK+2R$_mVp>~NW26Oc(a;wvVA#|4f9TjkJz-q#4R zEN5=634Yz-Pp>cojdHwd^1lAks*d$rD^0LmOPjD)uO#dPNftzY3f_-6xUB|2#g51Z z-Hma8;u*t1y?*}HA&oMOz~Oh6G|*xtj`Ct2WG4i0^V;#gnb$a*8GICvv%yv;-5x_L zX0InltOgTs+d{B8xN+JG&H5tz@gR?4<(CH!*D4An;8!am(T_M79MeA(B!F_=_+z-6 zuC)Yn6%s?{U!=nNq^TpR6tAI`df-B8k*)*!E3T#eb8obPsX<(P6R}gCYNj^o`_DKL z`(HHvdGX>g{=v}O=TF7IsPSG5PZ-;h8!jFi8@Ao`8uBi_UQs(H?Kn7G?0M*^Fu%O` zkH4qz+`WJQ$^9od{AA|tuF}1{Y1fb{?FK^TD3!T#M}M|Tn;zaPgLPFWWU{)pxd(^h z&w$~65OonCYh7Y{(hI~#RY4~O7*RbrqOc5#Rx+n&xy??B)cm#!B*6EK7}iJu)l`fH zlnx71Ro)tIQ~?|X!Uh6xv2cG#9D*N0uKLH%=m80Ca_!?)9TIE=m%XL{4Jfg8?nm$+ zQyZ}0WB|QsL@}vxra@G}JaFXfEFVIXsM@!GDe%MD(z}{!mCHAGfM?STP!9{AItRys zJTWPpCFQB#f~P1Ku^l7Bpf!%DSN6RwB%|Znkq~(kMiby&LAp!WTIwJCJ$~+B9S4*DMpfFS4>x4Rf2HIXv{&{^dCrv|dq2zq4M^ zYd@8VH34lA7fn>hZbmr2u>y>>g$93}m`YqfZkbAe8Wl;ZNOl=Vn7R57Ye|;<3)HVb zGK>!g0Omtb<{@mJN{EFXxsuyv2j9}6RBg2X-T}0Lzmo>e(Hg{lDgQj26kmuDE~h530>e9tVi1}om{N{KI?wN>`1 zey=-T>z6*P{<20JspjiAUKKvl^}6ESXr;?^SM%M{$$^j?=verM$SrOL(b1IwvDZP}tU3q*KtkMKH_}fVzCNXq^52v zKo!*}_sv_c6M$+3jJfk1XT>=XFj;pa*@`450l0D(HLN91%EgB%l7!4=uR{|K+Kyo7 z5sWU#kaCd2F{(ep?}bO}wW`2r-6j+3DXqQ2;t0JexV!25SP;+n0dv%f^1gQG)7%a> zvUyj14;+)N6b2q{_J9`Ld%y)uBeoHmTBPGhHn1J3$YU>JPeAs4t~5!|l1vOvX(#J2 z{z4OjgYGq0D=_)7v+J46Z~MB9o4qlWPY0frYY0>>$hn25tZ@XYSkx(v@>;jcF3a^i zX%~4o+J@<5d*ZLd5O^)Wf^DTgQS%@ZjP4Ag6V+?HxV_{8e!!6k0kD z^5{IRCsmb6MSusMWdr4#&1NM~R*>sL?%jY}%IgInI|dKf(^cs&-zufUTz`QSPT0E_ zUphB#Q(t8-5BojCu!&I&g_nCKD93`aZ%b2d0#+nnXahma=5$nK&V8Bue*M}L^hK!H zyPa+Z%o{;+RQ+x8%EVP+__OgzBb$k~QDrknDmrueeNj@LygAU?bpxjD0;va~49SAq+8g;lrp?q? zLF>zdVgUt>e{FZCVU7B1LNN5U?E(i3T@GEh!6&*NE!j*X1AqGCg;=#DRP_OH2(=Vl z{gR`r3g2Xu(hA}n_-UE}U|(vahY)Zg4LKg=*c+5M=r!&_i1}P7PoUf%Tlu;~d{qd6 zB<)!vMyG%q=V8G2nr){0#fT)tADYNf& z>m-Rkkr-nW?#&f~e-Wqi%sH!4U@w@MpU`?gwg^${1pI^vnYp)Y3_ekVaaPw zvti291-ijQNtP)Dc(NKwLK>VP7IK(=i(HB{_q+O^C0Oup6nSgeFr2oL zlen0xOE?Ul;DBH!i2RK_93RLjf@Z%J$)YH6AYbP{e1W-H2as_-7f>D$`Px)a{hbt5 zeZ2Qiz3fpmOo9AuT-1hb;og0ayYjU7FM}%y-)eR+$nd`gGCw-{IwGvEfm)ji)M-ADYL zk|;aBO~aS$;)N0*`;0twoV>KdjRsNUz>kVPS>tJ*-@YaI>6Z%}frGHQ%hm-00Yqj#6(L%4_x3elXy@czP zQC{3n67J+6`cuI?eKRw8Ty?`aMfkLfdX||&d5r)GG?F4Z+07NjXjB`T!ssPa?keqf zs)H{Vg>?P0^dQ`@Tl=?xz9VrH*MaX;aNhv*+3ZhQsDgbiQwF||sN)02VDI?cMm0hK zupD!m2CVw4phh*&*wYK7l=zu6(;C;pYw6=%09EmadMG$!PR!w^UP-s?upB^gxXMA> zu>al-3f<8rVx-R9}$~ z1=H{rMNI@=rB8*?#g2)V9F-$(E1WF$fr~Hk||b2 z{$aa#z=j0pv_JlQ%qY+lvO&uGUqD@j^q-3=kg^`*YQ?dUkSelFY_>u@P6tE;7kx^a zF@86u8fxcGe_V+OeBb*h?qlGHQ{=8B?9|PI6b881#~frtqN_U;Y0gN3*JX8`!@b?n zO0=CdR0dx0(GplOm`+Ra0@x=@Mc>178S@8cn>f2R6I=~npb(ChFd`$nDol>#g}i~# zd`4-eEDd-I=-Z5v#e6o3|(F z0-`SVYpbg|Yj2JyG`_kfeM&UnEjl{-nCO`^Y2#i~K1g!J)_mcVEnN6?ep70?Rzr8e z>*#}39i_JNT1Ky)BBLI@eE&feHP~S_nmKY3Fkr-uc=i_F{T^TwK4zV9V2(UL&GFWq z-*JzR4-=`Bc%M3bDHzbN$m&Ki^1*)C@qjkeQyX-KzGhv*cY(GHerB0Dz$8Bp24`Uu z={INRA7yToy!ryjgPNP>WJ_-pT`Q2YfAwcVrZy(2)_*e1IR8RvUEM~GWzpdAmZ`2R zUEE$-Sy>`d>xGh=r{~<9Wl=Rkbe(E!3G=g#*7}W$GpW5(H*RMo0S!gm)?MDScRrUn zG=!{fY|!Uf4{8Ueru2P0Jw0au&5gf8qA_kq9lKmJB5zL&1o$>W`1&JF^lbHIDXXGu zp$0|Qw0`sSe4g7pa+sG_>^v-SrzkHVk(vq%>*zfJJWqS3Mt~1i;L}7Mc56(Izw5W}IyW8<4GHT`~ySs_jjzn+pgcRWG#)bMZ7mCXx zeDf_?^IfURg!Md0T`x4g2$Vs`m_6F%3ZyT{VRNHarB`1c$<&+VTUzr*l#34q`hYE@ z+y&H#k>)0h2MDT4>iE?bJTOA78lHotm%xJ+Q_k18pREZ!M z)tp=t0_JAp*NcT3#UTySew^0#j|N+SonTFM4Rn*^5iaCUaN(oixq)zJd{K=11>qNn zcF~p1*nETF%jaI+db>qy@OK(WM;MBQ_FqC%dxQqi|Ku+fJx?FY=t|cvr@uNU))FA= zBTkZ2mmM#(6p|JVY0Xua(+pf0t^mc>qs4vLf%Cl}FaYBPXLnv40-(KQ3I@OP9Cja< z=gwW`jZzB5KqH7z1yZ8~Gl?G32>b#IVW0C_{YR`O2SM-2U>UoATRD8Tc|B=1is5r#P`l9BorA?wE0AJI4WJnBJ9(Z&%O^mxo`S<|D zuiqw|KGqd>I}}tzADHp`v)5K?OSJ>(4#fcNN4yrYO}44>cBu88oN!$qtPCGVHTsTv z4t$;rS0ZI1o5>v_>~6iUIg`!Fb=x?WzI38tb25-fyjbg-dDS_M0zyzF zRb88OwPyMtR5JOt(FqW!0aw4p_o_iblH6t9xgYob5lP@NRcdOgLJyDjc3vv>&q%QN zO-u)4o3*hc=rP}0rl9r1GMxzYagnH#PT%1?Z2XQ=H$c!siA5fR*#ev)+`Jc6K=fAs zokhE;t;K0U{auSHuYm%Is=_!c3td$UY=;$u7ztmlZ(OMnJeA?e(AbzjD~E^He4bBi z*&X)%^fKr3ulO;b-P1zaC6vS_)h{*Z_qa{%1{5dznONL;bAmBgsHchy+>LZ1s5jGa z0BxzmUbWa$nVg(NLe_eVel{%;;jjOYc!;;q(*#st1`wqn-M@oK-LZK62ajishbVe1 z?tQsFqm1n^KtG=h5Jm^GgFE>2sJcSHx8Tz3@px)-_ScggS&|3TC;_CJR|SQ6qjXt? z{4J7BI(tkxo5kkqGEB*GTmXAoX3C2c8g!qyVo+FdUR+38>u)Mo%o%-%OTZ3wn$0GH z!^|?hF7y|uCKcErOGx`GF1(~5e`_zc#ZV&AYoOj(8RyZDc;_wTeibG_@-J$euIAlEkT=-Yt<`e}Ul zlFMbol!USt_LhBQ6V>$$3ryGbp!*?#zEIYAb-6A+^@%6K{;zW8PUHIly3#u6KGJKx zKi{T4$g59j?7^Z+RBiQMNw_rAy?eYb6Uz@D4@us@)pq+U!4&)GIB=H$ky2NI68Ldt zEg7ko;_3$c%1>3r<(m;EBMmR!FZQk1N@8vLej59f4ozfI8ZPtB^GHgjX1z0)b=i0a zPDOptJdmM_`qG@wAGw~He>}{rNbOU1bTI!+CN)e0$$mK}EBe_13Boz6zMr*Pr!6Nx z?v_2f7Zf$+7)1F=a))WXx1R9qUUikA&I+H9$Z9i|C&ihHz?wpZlA5nR%U;A`Ltf~s zM{kd=EFkD6pdQ#_9yp~z{f@`z#RH)Yam~EBA6pa9X3(h6-q$Scc5HZ*!HtumJSh{7 ztWU{P=!IL78@;JR;Wyy`aeQoMU~i+glZskg)TiC+jhONSDdTLn1@)xzUq=xI!D`2W1>}Rd#Pnm73W&_x#d@Di4D`?AUPlMoIYQpPiRy z=l)S{66glePiEwv0IXSzP4E`p-n7Z8&r0#y1#=>PrPunm5A&SxHGd&&BbH1lMuwac z=JmW(?30^XWjU$12PAEDXnz0--~e+dG9I#Y{%y&Wk%)-QSXfh@Ue?F!$dF8?b<0}c zz0w|H%0%gEv7=n-L~*TO7I`bZZo%`yTo|w4V=eT`83nfTX04IOJKRPJd4m4E6+0vI zAPT3SKUCo{y)jT=F&^6W zy`g|8ILxAk$05pSk^gVH6UW>n^XiqRYgdjP3d#T*K^kb^0)|M@CzO|CI|jm+cocso zuXjxKRC^6Pj#Kiwc6hgLuua~)=HfrHPb?eP{J3@sPA1;@G$a#IJ;k@-i22m0i4JuN z>0mY${H^s)@zhN+o(T5;$n({gI&i`Pe0m&*nk>p(U6;g*ZxjcywoctA4*M-|vOg}| zqA7?2!RLF>jtPW`!Tl>6AC{*U-7veAkJNh#BA-G@7HhNZJ7?ft*ukLJN$)`mYQ)Bi^jvdGbs320sS$vohQ zhJ;K4q{H5S@V9y%J&NA5sTsFHCdH!#c+Or$2L258To(rK3#<@)rPhxN#l^c9jAf^-XQT&0cunt6+>YaU=vZd8m>7#CQFjyB$#m~ zj7K2b!k9)hwCM_6q~fl>NM|%w&i&~3`58h1Ic)|fnLtWQW2Qb{7j*>Agc|cN_HtwB zb@+e)e#GymkmklU?_jtdnBpQAE2wwSKzRnSmquwQ^7kw=J@`pjzc}_wFA#;encM$%Yd=nG`f7#*e>Xh^ zhoSKJgdc!1tb{>7OEUDS*#veixH(mb`bqpk$?YQk`o;A6%a%VOtPY{{{#(UU& zoct5m->7Pq4i8T(-;jYhHPg`pf^8EZ20Vv8y~0`8^!FMqUQY|sr(MCTNM?u;Z)>! zZ6KNQoRgdY6Zx@u7{U6gi0*+qffU~PSha3q&yyI}BIQ+OSgEP5EB66~EEsdn<+vRg z>)nmwmfHjEPMcX^Hb*B9sXKhaeZYy~c7ETG551g!;aZRgk8NQ@Z{cgW2wYvh#Vp+V zjcY=ixO*=IV+J<~m_iDiq$;>{RL}YXqROW(5qzi2MKQcAGVmA3U0W$D)zt=SfOP!6 z3*?+mfaVY^{KiF)0$GO@%D~9ZFW{1m*?u12R`@|YY5WS1s3RIcj`_AgHd9xlYu97Q zKi;U)I|B}`K>$*+vGbW8ue;Z#ka?#ygGJn$D4((hC?_q)-8ROWFueHe!&hz*;T;CH zcsqy96e59%S z0KQQfB#jIOfMz;pM3kaZw)g%;s)I^Q`?c8tQBn)-NVX1%3BE%NLd>jEg+!0Xoaeqc zyXY@#{jFgmiIb*kbzN@&;Ahts1eM!Q172_NTL|Pa5{ELbENzwLK?a5{hny#!IyB3@ z6i)=Yw{R<#3fKh8nm0dYajV8DrZSv@sLBdZxF7)f!o3^Z{tu8Y`J)tY6*3rzmh&1% zEbFac;mpIA+f<(1+hI(?!^^nf`-EFTUNks>>d&n8zwmQHS1k#n1oz^ zB(;q#Cn3U-AxF`iIz$ZD*hn;th>rp#v~T;<`94^_?Z>;eZC{`S*uOwBZOZ>p%wGc_ zH$N5h4XALqt$!c#a(?i!dD!~8hbV4Vq4OR(!cji?QQX0ci-RaxrY zb9Rnm1bM4kT`uyyOC)L_hW7eBYbb?3Y)qS;HlC7=eO%j{sa*@$ubX^A&!$!6<7g$j+s7PVd0tBm8PRqj zWP&Q17h8{)cDgh4;CSKn%8e!i9>wcJV1I&@ndM${yY#A{w3?zM663d|9vvK#R}*RQ ze{U1SvLLolAxYwQY&i7;@JHugCLy3rCIINeyCw=HiJ+Mccs!<9F!4u~fg zIDD1tgR&V-&LfSZ=vsc)fs{u)w#)0hNfvuMd`{^ES2k8*^OG6JMFr2BUjSgBvcthw z1FP$(uAIG(;gONF$!Ec8HJ+IWYyeO+9=)PIENL_HnOVjJju+4AXwF)DRC$hsteHkb zpc0q%^(*|jz{LHO#E;&~L6#W-UI&6C>&+eB?@qS?ikd8M)~EQiuHvcNN8#5wTW+x{ zJ7@R)r1>0kQuLr@=tnSQYB|Zd|F|T@&9B;njWcgu0)AANF|71VYA&LsYN?ZA>bKX6 zChIJgr^#cx%FsmL8~aje8ut0+MFi<8GOJpUS?AiDuSa;i8v%bBCv$Kmr5RCf!gz{6 z{abK_yMD5tTIX*SFtEt891CM_CHz)1pz$937M|)iiJj%Po3K0P{vUVjz3zXkI%h3I z6!x=ADz~vvgO%LjvGS1dbnl$rW~A{s`@+Qcy3F-JcE*^-v)AE!qBu5uS&j|KzM_w0 z3_iOA{{nbnh;LYZM2_0vjq-IhPkj(kWtiohRN02FAu#ULcBn>cZthL7D{k*!y{B z&XpChrO36|l0A*C#$2dTk|YRpit;Y>5P6~jTN~T|Z_Ijtexh_BrjV7tP5kpf=E7%0 z_h>+iUk4`V#1QQCNWr-sb)%0R0V+50y=3B}-2qsm4Y@%%D3DCd%tz{^(=^3wxEcUN*5E)$w5G9gnnWAh=iP(!Ggl&zU_Ls_5`30wfc4T)haqc zxJM>B3v}3AeeLPIQgrO#{!r|COmd~r3AXGAlH7XzB4F&A*~&dHdzkHN={8WwYuib1 zzT1|>Gkz}Z!gRAiVa5d)WXgXyYU`WK7A|_WJ^#V=P`LS}Mo7CTL8bR*s(BNZAd=_z zXDF=^PK?w=uvkM8-0noksf8rzGh|YO>LHol=;y_b9)dBnaaco9(xGggJ`TO&fc;D7 zSo-aws2TU?q4gZaUF3%Xr0FP4gMpr(F)I`{BW1JlRP|_149BdCHr_-z(oRVk$JC*yrj(Ch7^p77u5TNO5bQU~mYbGLR?0L?O`+2dVeohFZp;jn7lElt zdoFkAg|W`pVZCZMOaB{b*tAK$_#su*qChy}_E&n=g92{fJ_2b*h@mQ2T45Z#0{gg3 z3w?{S)b2i2o*E&@t1&xjL*_WVnf+N1sM25P9kg;+lD}dXtGhL{`Fw+$B=}|}jW%x} zWx#ROMw@6)*sC)0LuUL?M>6TgHT-s$hKF>&po_D2vXbSp6?cYckgx*#vO<3$Rw6NrshKNFaa~T8tSV$uGCRAmEZV`0T2=9?;G2q=R zf~+D{zTlFUgTxMi#qay4VcVy7H)um{(!rzqi-zC!vTb;i z_vac#HW%MLx2N_>2L`GIycq^_?3J>yhvEyJO*z9&+5%V&1MKU$v4DH zyfyQfUXN=Yh`q1Bz&6@Ny5cy-YqDc;u{k!``1W(MeXpDNLi4DtiO=4&@U=IXTGF_P zm@3KRNMeMzQWL!-mMnty_3ww$dy(1BquR;;5RL2im><=$wsg5LTc(z1;jahDBrj#I5b+jF*ruVJmH4s6SfXQ*eaw{ ze|@-z%9c(bjNyVmf^n*k_W(1YJXf8~il>{;n1aG^&)^28CY9D!#-y-strKz^EXi|m z*{q++$gf$bk)1eUh_b7qD7;>&Wvc?dy>;Zbv%OM z^7vv=>-@z6eo=>(%bP@gTATr!>Y3l#T{PB|ok9WE?B@2#m3ypmW52&sfUL5Eodhd9 zHGY58p0tAy)RF-DY`JS(`Q|^=3DAoQ9fBbR3~3sgY(8Nz2Pv6PfvUY5{d+KioBWYU zwf_8EuhTg)#2^2LCl7IO7US6Ri`M9Ij7<#9!NwL8a`rP&?G}c#xw+vizu;TXFsS9B z3gd#CeDuHvn)tN~v)`-G6A_L-^rDw15p6LI(KVE6w;evD=Vj<-MH$(cf}&35SNOH( z{DQCI^ZTmuFrSN0be;wv%TKX@2}TlG`QT3aH3VLlc4UjOg(oA4D>cSVKkN~>R?n8o z;dOd9gvD2 z|6$`ww%nbKm5Bo9Gj3XVOG2;kus zNf8c>h zLtHWlT<{s1Xn}Qh38T*0#TX(f%Tvedtl00IGBTy6& zY5R zpW6QqClvZhpf($0W6oBFK2M@0PJy+?2Vcv_yn*ydLwAiKI~356n7Z+R0Vm!5F%k+i z6!NaQZG-yfI%%%&>_mUTMuAJ?P1bMQeRiw2bD-)oLAQn#kg74|=eGQZ`WIA%Qw$~b zue{6kw#EC7{*DBRWCBz;{OPhx1SH9nSgJk<6vzufK=5bEKy6&Sbk?e92d`MU9}K;C zX!#mlT_m!~JhbWD)_hqwL0pubp}*OthP(;Vt7z`LsS4L#?g`O&OLl~7sQn7)`pT{O zcaziUsr{5%qSc|pCqyutb*)}ltbc%n`OZ_@#~Rx;Zl#^MM4ua0%q+pbI3ss8XRnFQ z`u0k#TAP68(*PAe^?gyyuJLLt^)c+?H^?S1awchyJslSFaK83nM>bu*D)YVz%&|s4 zdvjnpX;!i+gN7hvM!$JAtd2z zn0h+%d9?|Vp13}e8LrB`%v`ed8qB04+OW97kXkMN*{AkGNm-~@UF;#};uBpdY7%e+ zRs8I7$}N7kdqkt=W ziBR>O{Di7gWGh7?=bt=XF@e?NS;^5NJ;%mepl!^3qxRnSw|Zh zLeNEbgv&A9R~0FWivpgyaO~dJ@MXT}>{IKW=Wk>BF!#o2nGeVoRXX0b2nZCxI2fd! zJ%@afFwgY&HrG=!>F9YqN!Wu*v zoyW-YChW8ewo0UaLp$F5{xwBf1S)-8!_S3zAler+(Yh!JNWgJA4fWjfJg??-yGC~1 z&8C|(s`?tZMOQ8ZJp$Co2c{2Ty>X9>rPxNX{W;R)9VMfAy_LKMTNDUYuWN`eFB$dF zzQx_%OUFPawEx+7S@6Xmu2#za-`U};$P4=}rF;E>de9w96@>D~Hz+4+LO0P6#x9Hx zuDTE242YJ|-!i@Jr*G;O9wlcHX8vV_8hAis>46$Ihxi=mM=^B6Gv|h$p?+O@i6$X& zVj1t6t>Yj&<{8)@kay`N5xCUcyAxAGTl?HX)`?9wgMc3Wt)<>xUB4Ys0ozj7KJArJ zf(mLM-Gzmbh*BXl{@Q3~#kPx9oqbO52$|PxC0$QdnGl?dXpoyo9S$)^>mKek*&=PJ z|Lp!!^NS+-?_F1Z+i~`8+NF+TL(lJ+TFw@j{!Vnu`}8@-t-6jgqvSbG`}d4(LU!Bk z-2HR+w&>f#ibtqwe*WXk28qKDG0O?Pj*iRgq17qf0;~|x)RtEFn#75K8n#^!x6_@k zK5i3J=!h@EuX($Sq7ICIMn8OE+F0cI_iI4RpM$E&D~X@$*{fgQbY6L`n@LUP4lY*k zc9#M*3+Ca-V{Q(M(KlVK+H1pa8muu~Itv46?R`V^()%GujT2P+XZA3CQm){8F+y7@ zesKA?H(4ZBFJ9iQulTulv4$c3Bqy)bK>p+r$wK|C5O-^9YuQuLI--3OzO?KtdeES6 zx2i1W7Og9LmsQ|Y#_>jy-)!HfJz`1!pabTg*jCL}4l(j;OsNu=%B`dfQ=M!{qH*zP zeg*ddzb|j^k3D}U*V~026#Ra+X$P;B%(`sqieATQr8l3Bs?}KIHH|7YLj9&kp<5`8 ziFnfRVvL`EG?p~lY_1IX(cY1{mgY6QW*lB7lYy*B;(F}o_i!QpNT&H;Hr&g?W zy|4Jz91K?Yq>^@#36T;7xzhmf&I|c-_Np5ly^Jq+_7G2Bo(`v zekEJNaG~RQk(_0=@#;-{ZN4c{U0O+3Hd-gc5wIu>A@Io&u-(1;-AL93ieph#<7W#_ zieO4C2EXq3?s4ZG|Hj%$2(h4Kr3BLUMXGiW58LW^S>xBn6>1e%5%ekT=!~eQF=69V z(Xx0eE1wt#SOjlEl9^O&s=Xcaolg z&VBC+OYx4=^f81~X#|;9IGE3UD&f{k{iNJp-90lHZ3DDTBdEiN^HS17yxijI!*`JH z_H9EDBgH=u8Z{oh9e!B1g{wO$F=qg!5^Y6R4A{w>@I=06$0Jq1Q=f5R!l@LA+v-3! zO|+$AA=^E{4GP3jV0l?p0gFmBZ_BdVU!*WOZfj?Qobek>1_}ASWHZ*{zCejEr|y=j`)|_gTu$KLb}fm z%R4n<HsK5AK zwFy>Neh?tLDuzMw)1hqH#}ORuP|&q72&xk+BJrd#V!g`bjYAbt6SYVkvJAj=&B#WE zZug&r4&~~K{#Ga=q5(7+oAe~= zD|853Uai~B7BQTJr=h;ww~bprG#p@vrX-PaRdCvLgLl*Jn7PXP23<^q0hI2_$YTqI zC_>wl)GE8dRMnNTo7M57eC!;cm%+XR;CTh}r%n2RzZ*VP;WTGgVthxz2hzzB<_YX2 zox;HrOzYGen){2w-j`-eE~sDOH&UL<-JKpXw=8BpNR_xCgWVPc=SsFe*u0AP>~MaUPQ@O!?%u7De1&{L7ucaq zs)3i5Gq0C>3jyyc{NxQv#UT)1AO<>Q;n8|E)sh`g+}(ZI8Z}?|5IsBkP<1EE!65#! z&jFMYShPgbWeF7Y}d`bXE#l*yDQ4(=riq zetOT`6U@+Os>D0r%lH8J^!PGep`-lo-|V0Y88zvRn+5(jq3Q(rQ9!WyH#g>c{H3Sb8t;KVR4zQcyqN81!x$6n_MjztucX=r2b8-CUPq

    {dz1AH^Ce4RQISQBPX*hjRY##K;{uX}oM?4aYBi&2Wnb%QG znH;Verh0^&Cu#UgyUk9#m28+N7&27IHW;<`PwX%d#MA$>CNrHkMLL?0=w2K6u!CM3 zx?Vv)I4uOyZfzy2+dES){!FG<5=*$ofBdm8w;@<=702rP15eopxZ46e%>WyjD1 zUsV=X%ssn&ki$2&QkkJD(|0|ph>+{%@SBs}n-fgs^z4&3heJ0n96`BKimtGdLtHjn z_4K_yjVwFiJ+e#zE1w*Po%jWg%^I7S^QjwMQEo4V;3s5dpcVgdmTdSCGEDHp=yC-} z3rp($s)tI3z}-xAkZ8GtrR_)e$gW15kibJLdhrL=v@mg!Rb+k^&QrLDb+tSTYR^Fj?@nmM#YlnE8mQmQ5f2fNsT-$t0br;0a zdprX0i-}1)_>v=1)H!6XB}D^HfV3`{ULWo|9a@R@I6MGyEu*V-e{Jb~WFejKmxWr9 z(f`E-xk2bu@vX{ZFpm3yH}OzZnvLm+(2CjT%Y*o;kiTDNW%|vt!Z8jd=XjCv$_?5S z`OjfLblMct^4fw&Rl$M`@?JGwK<4qfl#Z7+p-7{8g}u$pG4gVXob3XA?i^0hEPU`O z2)Bn6#U=EUhp&g-0RxU|hH=L%teNhB9;GQ-&4Bzaqox*X5zuxN)2i=<`RPqZo%r|` z+|txGd~hY0{AOKSeiK&r`{w5io~BlWv}o`vhtfHP2Cminsd_uK4wO$yBvv|pSRXVS z_I{V^%byc;a*kt?FPta3AR_2jr|KQ)19Q!pnYj9hLm$-EF*WToN9tah(1d`n*{KX9 zP&M!M69`pMWgX)uc)fNzLBi#x47{eEo%vn&{f-d={Ys#U{cDm;J4X@?m>VfQs|;D? zC8N!?ZoVgQTuXP+ZH5aYzm29OLoh&S6m{A3A<|vDuL1NS?fi3IbDEv0(fx>~rd~bW zcQVU*Nm^O7cTw_T^_Ky!UZwXa!L-YmNa1fj&pQsJ4@BL#Oz_q?o_;c>;s`keHbIz0 zIsNsqKT!4k`k0|u>Ks8JnM#ESzB9x(J+|EL@FqPwHZQ7g0LV5lR&|_*Ye;kAEHKC` zn}7r?J@z#Bwcfwie`zR=Dw!?$wa^=XGjA^<8P&b_?t}t#p3@77{&9-c{xt7pMOV#^ z0Mw+@?OIeHRx6fR&PI}jGx)Rko?Tgr!X1_qUYPgLFKO>o9ltF<*!nIms>W|H!0;R^ zp+KN;RNi}MXb3GMpJRJk^kKLm2FQ!`J`wy`09?PY_o8Ab7C3tU-}$& z-Q9_7tW@{FU+r>f6Ncf9zjLHr6vqeOx{XNI`?^T?5gzF!CQ^P9uvEGJwW^_#M;$S( zg4dgs39t)=q4>4Oc`@8mDE7v})F`|!;}r=wkB=->VaE}n+OuH_JocSXYPtC2@=oB& zcTL4b6a)WaN$4)L@Z8|2&kWt@dMUuZ*{(;R82ZasS%db*F9D**PgM;SXJr!$pyvRx~OR(f!tdk;XLqn!cL}(Wcfdk=r7@Z{w}C z%i1Ogu1@E9>|B42cE7pyVHoNKt7PO*`)wvtXVA6!UJn0ztx6!`u1G@jE-fu38RyAc zOzgpl!pS0u_TK#4v%qix3KU*k=tKLHwg``)^K!9ywAgIf+-msxI@CPerqL;9VLbG_ z(g%MW?<)H?#45Gzj-ta~qbz(9nTf`qdAv^d@cZZYJBIjP zAeDKk=Cnu25}RuQj<9FQOB za3DX)PkGX_M{m&~u-Jc4O->V14f!>Fq*^>Fm@Z5m>VGT~3K+>+5G05Mf* zn(X;{_w&RHzw>wPw`iUIQ>0eDF%7?phJt136=?P5Yk%M zMu47*yInlh=cLH|g^UYl;FU;AOIDR8cVG+*XRj)v@`@zL-GpGjzdglXm@X`)V*Fg# z$|8>JG?kEHgHkXr;VUb#vOn#qgWc$!{-ZWyxQT^KIToCt5`9 zaEe%k=tDqzP_h<*j2#s*@=EXOny(JNTcrFnBzu**1b|@#Cr?e21QylquCs9Ox#m`< zP#RBW|DkTYVcANa*^R1I5W`^c53eXk-qH=k`$Xy8UuOL4XZUn=nJkn6IHBMrbCU3TUr=jXj@6%A)fkVXfM?K(x?B z1ih0#!!J+=W-Ku&0PKz?x!<%h!)JxT3x`r}uz<^68EhO@ujG6AvS#&K}YuH zW~hrs#~z*=-$jgpY2D%k;mKZ0W_cPT@SY=DVP+ZTl|OpQg% zPDy9(u!_;TPxfxBo3lntTz>jq6Q=cUiPXQ*X5-wvRN#sz8IG2Ira6DjT8`mR$u;|_ zWiRur?HaqX)g6nhF8=D2pZ-wM-5l>xdS-W5uL$}4I{{V^y4Lj?Pg(#>R{kRnT!Up2 zH5sFp1Xzw>2fE}0i;mmF&it2&&(mEfMGqT*k%t5+H0%ig?dB~2b0qYA6}1$92zo)V z?`?kP9o-XIrF?x4V5%0$qAPYf9 zR<52%nD=#ku<5-VX1bFjA!mbCflS}pn@?zAw%%0t2it}bHzTDFkXPJWh;&Fd`re-f z0W8A6%h>tCe|`-`)oc6y9VyLMm5is7wq)FP9s{NC?^(X+|jrrV-{Ils6ZNy3| zA=>xWKS*|yM=?<6)YkRpZ%!j|*CK+BkV%$@esRiyLtD9N$#!ZTU{5!WjTVKB&gs5C z8hlZ^z#MQR0HpehumAukaHTYKsEdvPsK=Z4`g1-b_Epr0`fEXI_7vG=mB}|7xk=VO zF)iz~A^t$a%`Xl9dWHSB+Jd^>ejxrh!8umm*tM49D;}uL!sMIRR%ChDY5L=Htv8+K zG;Y3&dmQxH!=kfO;nlMsBdp;~M&H3|M4g=CzdCG`XI$hynlnUk$y$yaKU+~z(O2zi zSN7q=FeF8Y-_zLsE(9AYRpNCrO|{EV5+MdiJI;28O!{~eW}>OPkX$5o!(m>Ck&oVE zN=JSY!ae#Lfld{!F9R1;)0^4%=wUauO#^pqePL+YVel1Ip|TZLaFvr=DbN}brtdIB zB**@HC5+%KqPUi}qN|&}CLtVpCtyy~KYvOepEZZ=Dg-#|h#Y~5)9b{neNmqcr&Ej8 zllDCnR>4kPwGqeA%ch=9fzs$RV)IaH7z-(wVH?qE3ZGA0m@-(Xx)!0D9t|X}9J9<< zUlxvOb@${+E2lZ~%-a093pM*9sxRRFgs8bW+cKERmVJB*jR?W#KoT@GI5SQV`#H>O zQ@Vv%yghzz$+?a&w2NiRZrsyzPTxw^$O0@B!c zF#4B8Fol|UvwyP~P@;)<(dv2&MMvvHj*|hvC({h{KtC9edX(lnq{(SZWQK&m`NWt#fd1h@#2#Qab8(n_q z@6(+4nHW$N`bj@z_2Syq`(tdL!)I{1@gAmzLOXIPOPsv;;55AvUF!@BvI6f1ZOtwi zw76d1Hhs)O1zI7?bVSy_N)zT^?jB-;Nh9ogVcO???O#m}_aG1-d>U%e)mXstX}Mnj z?{u#ILT6r>xR|0!$&NyHFYV3{yHrbe9+-93!aH)=4}fZ3a*nf0S%vKPj?dqLcn>9c zSsj~`3h+O9o-?Lup9vOs+dK%ts^s&dcp&C!7`FQGW5lS(kT&Fx?v*6_ljOgBAC1K7 zi%hnanu8D2XM8w zgOU0oVd3nFNfwNy?!sjLkEHSu!ZE9)r8hU5Vs3tTd@xlX>TYy4NFGGJEAjg~QW(}) znOyyYw*DT`H}PuUMAdj$kaXZXQvRt}UnP{!KRScwqx^X-OtEMTB=$@(#p{2co+bp}5-3FF{kx$4Mj$ys zmcpUzme>4KI-paL&?^oHzC#c88~b-4XIpEkAG2|iEQMJG@fYbDwBmMM+tp82cfQ;g zQ~Or8y+Ldx6o?hyYfJDT3OuC>?<6tND|f^-&sSb8x~?gdQc*FvxYDmYa4`RgSti_bH{OajCK| zpAK!_CfCj?UdpCpCs^(qrscWa9BGC^8CR9<)b0l&mmI^8o z#BY4@{J5=HYkiH3J-l7RlWqWpTVc$?5E}dRhQ&Qk_A27LbmWHBY>@Dh2lm&K%t0TJ z@gm}rl5X6@^Zy?c1+M#6QtkTeeQg2H_x=z0Dm?IT{J@9lS#PsFOj7q6oy z?Ob=_*DO1Z+HtyH^l}9d{g|8|yLFy<32Mffj(9smBK7FDR607b{xq2O>Rz!aTyYWc zdGSxl==(h*yow~bVV4JolQhf@6^)I);UZqxcF}9FjGZE=G^EY5(Xe62dhb0hxcQ-b z&H{WGqW8B5K8Q7DKP1Qlj^=o#2zcch@VN!7?@&5Mtu-Dt*@G@?f3Rkw7|R~cbY2@g-Wzc0CEXfKFScNuBX3He$@>4RU9s<1QE{v7(;ttpi;x!MK6PY zEt-%+4j%3lkm#fH!ulmko~Y09MP35yDp*GeGn+m8D`yIIO{L4lxlI*#Vx9jquME8D z(3%uaA)4_D-aj3<{D^h4EyPb!ss68TW1Z5Q>+8I=v)^2c2D_jxqCjmqE#Hbc+D||S zwGEzZ3z~WZvDxQpRtq>g)vx}wkmaEySLEN9Ir7K;hPV5rC6)e*r#_9=T})uAK@|P| zxBJw~#k*~6ST><=?<(IMY6_NCA9oQpkE+ox5xTD)GWsmOx-$$)SM4QPdE171PR-%b zm2;ILJXIE?F?*ijU^4^`!_I{kX`dsu1E7>vS2{np?%$4j%(!~D&5;0=qOsy^JBvMH zoy0wIitN$}D#d`sXIOXP;BF9gD7L$oP@kx}}-IRl7eB z-A6#~nalR1aRb-5)H+pb;clc!Q@o*rw(o5Wc13~ggW-qKsnSb6Uffi$@o-^ZnG$M z>d=5FitH=dGk|}n;2zuym_nZ;8I0A{Rh@|>@7C`83R+IO-%!8=OKtgeWVQ3mN$o6) zJRz$*Q-50VTnXB|)YY6ZCXEgFQCXqy^PXLjSFzty9!dyJCWd;K=XP?W-7}sRiiLCz zNEQCUgJHpk_xvH3)OPtQ_03YF{0BLymQHF!L7H) zI8{|u1E2~?l0xlD3kJl_70bgx5bWIc()g4jy=?SzTfA)6)LblpG3jR;)ACG-f{f}I zwD)+k`4yj?c3$SyfxelpvdYy)AQaquJ2^MkR39#QRv)Ah9OmYD)Ks*&k%JqpCFv|% z6xm4=@;|thU&qVz=?cKXfKhed+3w5(H$(9ynEZcZ!1N9P-qWTYzU@GAdM|{xJl=$9 z;BHA(-z<3C`pFd*wGU!*Y#3D&s&fwkyD0NMC$CT}Re5o|rN3lSLf^D;C8zd_+aK|IgU8kJe=orM%bqg0`YnC!9 z2`U-A*t)m(Io07k<1L8_9yUd`LEGZuZ>ZGTfXqv@T|b3g^ssH&zpVOd^2Gl7<4r=1 z)pNA6lL#Dv+@NLZN~Ut8^6ia^DI*;g;($<)F3+3znux7>*S9ZZR|xexwxS&Hqva6c z3imqORiTbV-}{R-KmTdlo^fM41> zq)z&Hgngm7Rjq%ndgEnCO36ZYWy>HecFX$s-NYdEkg;Cn)Q}c4u59BR1jQwQ#v4;$ zdk5Z|hs-{q-yNeC3%()(mEJ15~Ut<38=@hd&5@O5{$B&UL%_G37OCD{*yZfD8T>RPe>sXaKSq%(f zUiv?++kN#3+OaoQwv!-t=!xIp#6QoEqcn=#biBU&W}k^RPol=4g$);yx4c29v;dw# zzYhkEwu^{)`=`zX`a&MZ4nTWjEFx&uG64erP{V!O80df?JoqeQS?L&g1oa4vbZjHg zfor~a-k)W8(gt0NW)0$%>?@z!yI%Hhi$Xf^yTQdXVi0Ll^P_AZOyPC@mq7z27ax0G zIVEN`yfm7uo6#{_>o=P~&v2pl$z$>>8I9qcnFhp77XMS_*n!h7y$KnzpQB?8D_2RX z(TJ@;!rGfz3XW!f&chh<3BKwWrsOAn0}3U{m_}!z5Tf1Z8ONSJ35uEbS7HZi^MeK--(5&@ z8+Yx&7G&iMAg=rpBaZ~@Y-8QZwqAe6M$jzDA-4b!w{+Ln5F`*iDQ;vo+YsBWnAoi) zb?%2E<;TbhyBdbi8mu5amuq~7$Xb?5Ptmr37}oV`Fnmfokr8#Eaob)^axZdZ^Y>U+G+u*itPuyP@FD27zb56bVsrPNRF8frAg#XLK^sRuciZ z&@=ECQxw6Tn7jp^@@PzGETDgu3Z^>2F{{k>;Dm?q5v9=q+RkEUyHd}wjYRNL8S`Yh zJ5UNU3&Q^4BLvH+Z59=@S`3q(bxqnAtP^7MdvDb*ZwEK-dQ|GrN>g@tee>Y0P5p+p zWKw9FMN3wJO3;%hsG9ZOtlWe?*^I#6uc-!=Ux2FchX;JDk>2~|;$hE0~3H7Vl%du*&Cv}57rrJVAuZayS!Gs8>GPpC)2?u55;dk{Dd0-`N&RQr2IDDC^&pu5cK^L^t1)y$bl6oR9^YVY-n-W zv34wC(1BiY8slJ?SnX(N))w{P99Cwb|5U86>|XRPS4%U~ukFrDg(8nrCM;d# zQjO)RcV&*O$5hKq&CsXfg^2zcnY@m;p@lG!^zTuqxE`SGbKsT$?~t>cKLSso;$Va~ z6agDet^R@n=I}apGqzq9NUzL1+xMU~x(7*PW>akFpT@4h5Qq$epgx5k3RwI6INhhN zuSM8)aWnt>#wAu2b|Z+V7Wv1+P0!HGpD}J!uui{Jz<9(PvQ%$KO;_MbK7ivoeTms~ z_!0{BuG@)@kocFRV`LT{pd;Zlnd3i=G)Nhqp?V@y;^o;6A#;3>ZXq!~gO~`6m&w0! zJW5H&3|u<`3DoCkan3E~*A8--+sww-c-tZ3q=1Ym(KIgsB$BjQinPQqXlbu0+rTPQ zoTxSJ_!)r!QE~9Zo1qntk;U)=DO78A&Vnc!@lMxWR8a*W&qhouA^ujnlCv)&4;Q*9~fxi_lrmrJ$-gs#S*!>RyBBkWQf zni+5aB@<-jQG@uE}p;t%`t)qRTsR9%=-|HVSm1M4j=eM~RqoB)?-INgobgk~T@fGoJaMvAnJ*v8k z)#~Iom{6;K{p>i5^Co0kyB&Re7IeHc)#OwFekp+3VL-9CDA7!`xwX$;r`LCl7l)o} zqTI4N-Y$gmAUOQP)!{?+J)3%7`oPJ|I~p&cio+=gFiH=aci-t{eatG}Dh5^t zaUP^~=C&DjCLSjUT=hBNRN+iFumx#yXL}3;XR=E^$TZO_T1V${=muGx=6jC*w)dmZ zo}GK1yqx(nt?<#dd$M(ZbB&&p4SUQf`}q00-DS^@ZJ$&*b>!672kA$o_Q)J7yf1Qe z=8wWxH^iovY|df_e%%w@@2hOxlwH9HRBa2P=XuxqrWpGA`zBW~lZJPl^RldW<@XO} zn=o(pR9yo-bzrhbi?cX5h*3)Oq^zYq+8QVdbv2^E!?^5Q5-pde&JEakzF#Y7zMLdX zXhnrC;|(p8h!76k$&!LdW8p(kaxVAyt_D|r5;Bz}IrsHu&jzrZ6K`eqnOmzPi*I+c znM)8JepMlVE_3+znv9BWocX^mtC6jo`m?>8`_M1AeN#&`1J20J(2qyhUmH>D;@n2r zTOg)hP$68oW*h3N{;*|8@E0(Q#%*KEVu`Fsimfp-TTTAoC`eSyL}X6CnIUl_;O$H{ z5w`HY@vpQfT2Rh#stl`O#eToXJ3|yx{?FyGR4q@ z*ErI)^AC8`V>Yr;VnZoSjA+aMZvYr-y3y7QYZH>D?tBgHG1v&XS z8?GgY5gLWcHG<_u#PZvBO2c|HfpA_Gk2M)K38DSgHk`3a)S9?Cd@Jw2aBmaa)g>RP zU@D!LVy1fIXUZr%Ll|qRni1IkSGmW_4SU~DIbz%Fj#oUZaCkI9GUYCa$o9*}{1}wu zz}tnNYkp8DLR{+?)Ky83Y`S%ITA%md@nUAYrqVg31^SS>Wv7TlI@gWQ>r$+IJq?C_ z(u0v0QOWD`niOJAQ%+;jKa}CDomLX*wE4Se-O327j@{#@~hAvCTHv4z|Yc{Q`KVuov@OCs( z?$jIy&eGnW!B5#w98Qp7{i!vkmYVNistDFLc!Y)0Ifq1Kzr8E^vEMGK4Nk7n#7t9_ z-#4ogNhTh-2G;2iWmuO--3JO!kJW|`JC+ssIm-2-H>El!%edPhF4=nh97+$We|y;6 zS!;C;t=mdu6L;I1ywqQdENOEaWBUF|pZtIGpR2ewEz%uIlQ;3$v~wE{?JE14{%*N| z*EV>7(rr^cnzOxO29QaoOC=p`O?YjCzK|EaTbhHvuDYdGFWd`?xFKxmND734FkHiB z3|S;qN|QyT3FoA~+z9gRii^)OjRd%{>gV%gt4E^#M-;6FkR^kq25CDp=F%!qu45vSOyv z1vTk`GYKNV9YquR6kIt_b_B(e)fSpEu_(t7h#|X+aQ5__9T}>W*P6RH>@-Bq`2 zDVJ4WcB~LQtSE5gpo!bVT69)&bp;P@yilg6#H$t>4lC8Jk3{4(%obG4?(y&OUYlm&5)8#kr%~ zQMiN4Uue%LvP9b)cLj%W>CbXtfRpL?y(W1NOrdOKeu{;lQFZYDPN1s|DSOCtRuE3V zk%wivOZs?DZNYeK+@N_&?u8RbhjDxAB_(Qawiv-q(;JOQM3!;7C797i3!^C~%&gMKAr#eO`nrW>UsDNf|1cN*R|s+EPrmwEo*p zqdX2JTnA8fUk>>X-1q_xx9T7)qlKbL#2#X!h(FkRzLhd!nBHpV^bCm+aJI^i-b2YS z9C8#)hfR|et}Uxk_Js3jpL^9WhMT?obt?G-_g8dGu zX6v=I$4N!7l(3i)>-ME?){8Lj{69)**nL0*WgKJ9fD%Lmq`x1W&u+E3`4j>kZZ|ud zV`HXTQxW>8yx$aSwBApPJNt15u$0OBtS33sJDb^rU9Z5Cv&qAf{p8Nez@!jb3+V53nI?+Cn&YOJQM3l4nOAK@D8C%gb{0+|`$6!c zvr9&lTNDCuP`5exQgX5ABUBo_^!8>7txsg!s*)`MNg%|0fQ4t?N%g+UXwtwLhV*e#6N>>R{+NlUm}gnFeI zoD(?rSA2uqsqZT_&fUNuk$dE~AxeR^JCG~j8+Nh*p$r`DFRf|blzyoC481&5$`-BU z%X?|5IR!30=a&VbZI$<+G3u_v$nNr~5^ZclEE(_iJ8bGR=5p*!l@FkdrE}>)tsAr88%jnK1v)JanK=Hx_kO6pYrb z7!|atl1JN7CbP-k*6exck=arN`ZVBN2tu@5JOlu^|1I`os4j?P1PvyiBDpV-!_q zwpL5a%HiG1E>VfP>WlOxf-Q~q8CAawv}0^&HYb@>R|eiH330p50R$!k5kO44J?XT zS>SEPpU!h}AbP3yz8si%;}QqjfIM4kdsUIrH+i4dnUl&Uja!o<)B7$b$Yj`B^*Xx} z8C=vmw6!ni;dtAN10AGlFbZiKluGW?-VO{y3Xy2veFB6YS9X;sJKy#H`i3&ZD7ya> znoxSBq(DpGSApw4#}Oy>zbTlPdMR{8wYK6~^I{Rjo5(GHY614CyTdeqXrSyGXNff# zs|)Stf36ws-9j71o@kWQ6dTunMKv$byToPR4^!N$c^LU&$(X?OY$Wl8hPooZ}Ob zeWfApiUCsPGoS1WDmG|>-OaN->WdrGBt%?`4Otl_#k+($jLTM*u1`}Y<`nTEuJMC~ zxdqVC_zw;GtX<&Lc~uiq8mPw@0Aeu8TiRweY-_E02~5dXNo;9tA?eckrF7m5Zmxe> zIqItNru&zT9-5}+mZ=V9r`ap_LDhTLv?q6H--V(g?d26SE8h>7gYv(#?wLfMV<}6k zed!tHk`QaF+!z}avz7Djm={Y8UPx&O?X*{`MFg`TGJhhpfRRJoWottI)hvc~MJg|I z!cj_r6Xp7!Cy|NE`@kh*x1LyFV_jVyc#}b;Z5n$Bo&gq+Yldu)k&mjcRu^dzlp&fK zLehS;+-DOeo5q8c6;40*sOym@ym6_0UsG}e>+%*?Xq7pR##mWIx%@F%Pb**c)qMMY zi_%0afAi_b`}v0yCz2Mw$H&KKaVOg!RJrZo;J_@ONMThxyO&!GmnA8NNkR1THUXuk zUzPLvl)|x&k$2u7&siN+-xxK6yzlIw2l_>Y@H2yi`1qAK-fgPP*P4#zn8n3KQg&aZ z|C~#!Dsy06bHW@WD=X{R($71 z+l|oB4ZE|$OY6yJ)z#Haux3q5Nwkx|m6gq6_x_Nhad&5}mtM0Kq95q}S3Qv>whP#} zgNO=9t6PFe5}J>pNor19z2$*r^$*J_C@}A9t1Ud;yg4Z$K0Dyu7BW6FGgF|p_JteU z-)?UD30aCV=+i^>ufIor-*{G(`zl^*$6?jcLv7!`&=#(Z=H{}e7e-LK_z@n(Kqh+b zPJ?o5N@^+=amKFO3ayRo!UjyLh);o!HJ}6Da9n8P+e5>CrUk{5f)!MHuOwsSM&vTx z{O4!bqkDN>)&D)5t%1P5cD~JKhPehrgCv=t39bK+uJ4X(^6dJL^--!;skH?a0b2); z6#+$=0$Qs)D1r(ktPl_dWg{c95-f_3MWBK*WQiyvfdplPAt)+EMnnu7A~FJj1Q0?X zA>?=NfZF%{yua&@HtIdD`x<9_&-a|`#(C{!b%;W(Mr8YkzoiLkAmb4=fYK?hLR0e` z&Wc}smXHs`A>Nl5>)SUd4*V6PyI6Y67$%XbW>>y%sEl;eifCzoXKn%k2~FcisenEK zix^trN9jyoRY5D~n&tAW(ku#CKfOi%1$@jOXSu!15{&&}|J%p9st3w5cYxfsEF)JJ zbd^qSmR=J+`hWCq^Td5lJ4xPZf>9qMy$Y4TQa{${aavu*0srsdEBe+mr%A3>)Gw-= z>^}9)bhlI-O(CDUxovm@rY5Ybj7=93FD@pq9mhBCL~I@fCZ9=`^NL^a0HA80P3RIL;JCwf zL5cBk6&xU1{y5of>c$_0@vMi;N$6e=o&tMPvWJkNX zXGv5T%5c+M7w4zwlUX+D`smn##n!h+vupM!2T#j7hsC&wA?lUMn$u&VXA={Kou%CBL~?Z~ zb3HC7bcjuz_6E((W$F9FN=QPgP}US^^O6vgu%>H{!r2m4s)z-n^EduDr`25%a82}| znieZi9r;SOVzUzRS!dBT#7XhdfkUEZ@6{!S&h%nGwcyp=iQQhMM!CbJb&(sMT*G3+ z42h9@`%f{S%oo|O5xCD(N!-arWV+|5Q9F*B4j5bT6|*jSg+j_ipXQ}MM<<6kn~*Cp z2Y(%!b*%^m=kl&m!_a-)%#^%JqvS#Wl_O@ZMDGHp9@RP`OD+R2vro@ycEpWAQu!m` zV_;bowjGE#xKslj|GNRs?TOUU4(C{|aEzL9xUq8fF;RR)BoLO-I$mXzpC9>l>>b5j zFw^O-_|0v<(VU+gdwb3rdn)VwW@W5glErY)81=l@81A$u>!r7$Ubq}LT?7!#PUCLLW5^C2z`o=L$|BN`NHr1d? zuSBotv3Umzt>|Gnw+eOhX~->hOdjva(;a!ul^v|=Lo2& zk;*{F<}bSQpSp=2vSY#OFsNwhib*WE4Bn+R;+&v7&Sx#03mp?q!J62~c z-zSmHSbQ@;l_con-?ym6l7$P$G5F3LDv9&RB9==L=mvcFeXK5s9hzNHF;C)&qjSmz zI()|}Jk8j&kJw|zxo+*eu}Vo`E0|BMNP}0Z}#KCa?Et5a0h?3ZsUQ;vKF1m{e`s#Qu_K z${p-@w?aYfWCSy#0DBNdP6@LcM3D{Frk7kvK!kaZ9=>uqtx6E4{Cl_S=E8T8Ybe@=~)16li_ocY`U-`5XNB1EfJtZ;KB@5Fz z&p66t_r!Y}phMBTEM^r+|B%pNWBB^<8BMR&QG7)$2$mZu@`BWg4P%i>zG%nr3QhNbMxOe`;b$>oWKaS zX->cA{LCc&Z3|9dz8|u6lhAllx`X(g)WHhRn-&2W(@ZI~5|B5TB^hK_Iwe=2lFUNL z0_E2|VRAfymAvYH{BQi}=r==)PlP>B-ywbM!|M0Xv(f6g6c0-!Q73?LZ`tXRXPM4OH&H&Pkk@{cy#N61*29nW=)zZ=TY#d$Gqk9uNZOXlWW&OjJqess0MsbkMA)ND zb0(;~vj%x*I~kch)L;jG71OzWJGiK*v2MCeIlZ*^X^XaM7k4M zRuz1dugHAtoSu=FZbzf*g{}7kf842CPijT4Ql7}(w2597JROdBRbB`%mcHM()tc_NG*q39n?a_8}htpB)xb z=b;}gMPgN9&R#2GR-iO)(+isO2~W|jBo0-)4cc1Ujvw+sVVjK!E0a}RjQu`pSo`~}VslV}eE14)tt(@g5Y`9pOQ8!>^O z>*lf?_(7Rg7&wM2`>B>pC;+f@sF2fe2FO;nXb(by^Di7q^M*0X9XPCd+peP!0F#GQ4hn>~EL;$v4s;P~9T@r`M8 zqi#92cx6pL!f!0}eU=w&%)ieuVa8%IHX~5F>QTgUR$!XSr6Yi$R-z(c8bxG0px7q- z@e{|_#TLv`ybhwW--eI}AT|)SyV0<$(v@G0Y&4WiKQ+EBjA6b zlvUvIJIkI%N|DPzzNO&m{`#<+-B9C2YXh;KP#)Tyr(4 zdS(=B#~vPrdR=8QSVC+SODx>MY8|U~s<1&hZrY6TAE^Y3mOmkpLY}*IUzBOW_SX8DBYJ{KSia&VrdC2 z7S?jMFpV(Xq?HlUa+B6Xhu|bXSB$ZhfU1T&?-kg23zg*J$E=fe=DEx97}Qzy{el{f z40)QQZH2Q&gY-hRz1c+SRy~}q0BNhzx9VyCf8Hv!L4G%~7f*09mVyX{&jEXUtXEQb z{#HeZS`RT_SkF<%K$Nj{)CpoO-Ab=}{N$7*7Kr#lY5j>u8=(P8j|7}q+VWio>w7Ean2E~$W!IN8&TxiFOhbx4OoCz}fa}dh$+Yn$neEm3HIp{k%{VY9jd0-Js56ZFj zPPa|N(auYqOjPKr5P9AN4*TR~&lcu>2pu$D4@)op26UW56-oAVl5RnzQ~wl5DjZIH zo}YAa7ft??xNZT)Pee24DAL-6*F zM^XnZy%BA6F3lcr%J+r7$jF!9^U8H_pk)OMu6NS&C|>@5D4@38GRlpiHs>2Yd)c#w zDL@-J7+jQMnuW#6WS)&dRppgi6H#B+WJEH2 zcL%E?Oy)dcu0tb0TI(@HxQ*4@^TU`{(GKrJMW4H2*qWO(-_O)N)##bmOft)lNiJk; zA{Fe7+Yo-)zfgFogm^0%037whhTbn037NsC`Kd!l5~t&u3^)I&@pIK>V3r>b;4FtP zK!I2NADRqXilKV@AHI5ppTl$~JA1z}+ z6@K}NG+w3gdi|10bF|`;2to~4hL`1xUw48->U5HuixUf_(foK*x)Djwy2(m8Nkb%N zI?E3eG>wSRYjkZ2x!Y3Svi-~A+{INY;*KnLLyfluZwjHqnND<*w(Y48!#pb*IoOHc z5KUQ+eRE@-Km|jzHMp=!69(mZuXT^6oo5~p2Dbm0{mN8ZeD2U_qDnabf?ZYyqji?p z&Lyd@SN!Aow)RU;sxmJ7Z>x0e(D`og5ogC~mc+ZJ-)`YGSUWX4iT2oYVGxmN1>oB7 ztU-Yu@t)rbjj&lOT>FSY^eg$9eX}E+Jmw}6&SnU)4pNxZeLcIyF|!5H&Bk+esfkON zipO`Nt=JIykW2S&8pNs!JNK#S6%#fB8)GtyX-8&}d4(etS)@d(*n)H`Y~@RW+Oy69 zk+y9LJd%|1XNZ@7_tA53_{O$vdNsc$6_{<&``6g(=FF8EN%1!~puXc&W+FOAp_!`Q zvs<^SJCeC zIe&nlHqv`(sAfMiMJM+aor83+jt=k#lF7r8g#z}o4~?Q!e$%nlf83j?q6Q6(s?X2M z8%~zyu*htBf0lORcZ!!#aK3h$cqoS@9#O~n+DHV-5|-UW5QAFfG-6tFVPY`{?=YXA zV>Hh~&|-TB1CNpKzZ)K4mU3Pz*ZbLQchc;#75~mxfw1Q4={Qc+%?a0A+`aXSAJWe!Q60gp8c})#EmO;Xl3kq-Xm~cWJv88N%0RzvYys8 zhLs>Uy>mnWVi#I?^m)Og{t&nyd-}i%`9ogI}gp`Oeq6=G4Nx#txeX7E;;wxo`2+RmMuZdHLxUDIe8s!|n zxnF>1){$8Sef0a+kjDTpQk#BUS=|9Zjc_UwmJU}4%NM>zVuOJeD;UwdbF?fWjw#9C zpyoj@q(TgDQU?2?)rLD-1~-j+;jm{FtsM?-zAaIA;)5ud)>qhfFswa6ErARXxr=-ZQy~F4 z>(J3crzm*lg(t#{Lt4$GocclH%yw7_S~0KcPDMq14;#aN7Nbt;v!RvhdA3a3kuwUE z&<5nnM`wK*@vq0no-Tror_rJQ4x|k`9JkfqRX8LbV zj=U1C{3U_#=`r?JmB7NyUJXlGP4XbVSV1qJpi4x&S+R=e7cmGaPM20n)j8Z zdgU*t%gB%&0+PItD#40R^IzP%1uk@9CM~PdJFERupMle?Q$-u1k`O;FO8!0uaUdWr z2GoQ%L%{oT#Z<`|;C=Zlc_pI%3xb1HulGf$>{qz4JZTAp0-Rsb3Je$vK7Nx6k<#)o zgPI%T0J7FJ$J|mtjVV}kd@-J-@YCBcHjE1x4Mz;$y<62Io7?9gcIRI|WU?%P(IIkS z;|u2zy!xCcw%BuO3RL^E(w6xe_P)@=7=xn9;QoQwl9`H^`XnQ=nL|N1OoDk7ZW$Cu zeVFV$BYxr3oQ`ce_^Zf<^Wv`ovM2K%;-mf8P505iTM#XmY*>+EreSnff;RAU;^mOs zF~4kwg|M1GenUjfQ`@p^yK3+Q)rD7=rX{`!(jK+&muUS3m&isifPO-lZ;tN{gHW-X&wELcK@v(&MNQk`j(qD6F^_ zyBbv!jPS~(&C&B&c5pC1WYaLO%!io6iQ6??YKd#j{UC$daIyV5n{JJ@#P;P*vd<`4 ziCy^DIFFgD_HG1F%Q0=<^d|lqR~yX$5)UoJYSYrDX>}7Md3MV_2|LzZhG(=TC+}G3 z2@5IRmfCG(R{|B3_t^>dQ3r=VJf{`OuxA;~iakbEcvkvFD*cc)-Y3w@%gaS_jb<0) zPb{)?VJD`n1fiK#=aEu506-o3<$mUtb!D1h2V;8+fKSs8<|2*B%J)A4o{I9a005h@ z!A_26UO;g;&ocyS$X>ECsSk0blFS%(<(?TDg76b6Ijkz-kkmIUY&OUV_R4A>s-_4# zmP|}k`1(uW3G3f~x@v^Y=8T6J41P-9G2UGP#hqg+2Q4tl*nqZPrO;PXb;@{0-Jn@} zS^)0@zng7lI5lYCnlbfPLn6>q%|k~%ruq3zYYNH-wCw{v`-!OL`k0HC zk`ZO`Xn@ap0#rK9zTnlhEQwJ%KPO_;a~CU}bYEaQ-xWGK;rO^Vz{QG29|NPyuw~)z zAN*nqcy?j#z{^4EI%upn&r`Ax4_>b*BAq59oq?vaCA#|(p(hf|s@v*1aPAUV@!JX< zS|$T-OP&g&T1uERBOGnaJb)vPZ@z5$)KoPxAv@SMVBiNoS?{@zcUwu~O-j>yr~kmA z60}B5#@8|Jw#vQUXQhe>I3D23PyGq$Kw(>-9WCHRBNd%NBIr_COGCi(fFT&rdIYfa z!dRM@lrNVrFGI#)Sm)C0fB|Pfo)d{1o^AL6<(M7Rj{twrYCNhk0Sgpz;(+w*X*GOo(yR09%eFv$EH~skc5?-pUKLAjyg7ZQWq}=usH7I`B5NtA+0UhSD`7k`!5Jd zAiJceWQ>F>sLmF0fN8N8(=P~e;tyI~1w*g1s>C@yUTw37{k+-y|z*_9J(4WNf(|3yNq(&kovXM~vVU?YY= z1lvj*txPSYKpeuQ)r#3l7GdZB@~mNIk?PEnrtSX?pKj1;2hQ(byCp=1ELuK)iGmJ) zT@lb&;_06L4eG-n!g33*8Z%(}k^IQSb7H4}tI4rJR89bMPeUkRt8H6Std`$?28cgr z`Hh^RO)mJ@n03rd#GvJsjbA`DR^He}@U-B!K>(~u<#4k(CWGXFg>C#5jL6GhMedMEgk{1f}Fl3EfYBVeas>f#NpZxsSW+X z!Jv_1lw+@kFE5K})Q3mY?m*#>-ZWfYBrM>pV``%lsmMSuo^V95ezOa}YBC_D0I%p^ zFw7z_hA%VovZl4oHNne(R+zovb_HU#IX+W#rf_ixn=*CWi{!B1RzO{R)Iava1Vy=^ z4@bK&1f@$fPW!bkbsE;53H#pqj5Bk7@annWU-^;D_w6rxEaWUGXrUVXjzd<4BC}=J zaCLQ;9oEu-Ahm^5hYm5FNs=7lz#b{s4TIWT9zBduvTtis;FN@ozknS=HODA#OCx&n zKXRt=17Zn=Uw#h=?=}7%GfSODg)=`1BNbw*D=RPGsxa0HJwE`lf^rm@f1>-`zvihK z8hAp$3I1Kq+v}Yc3DU*?5HJYl+A6a83qP>mao&91 z{t1`ueHrEJM^~Uq)K5l!<^pUcW*n3*!h``Ieoht)JIn2dBGK3ou3N z^-_QiSw=~vcntk*VGaA~u3@vhJ`2DS@)qfcYxi9+4AB)r?SugvW2e&SMfSqFsO7o= zy@t9v;`yTkwec-2>%1;jVHK>`3c9 z+8FBQf@#tOl~AaMZ|MP_twPv&s1dBCwbE33-#Z?0bc;g95B?cl~JCEd;6@#gR59KM)s=(;1vTp!h#lxU?+ReJmGRi+?U z$)4uC{=kwdP~p`qf4yC3M)4WN)Dr;tX+0v7*s*N`ak(7|HFqR7YJwnK1;#>wV*Bm7y-v0f%UU#aiiDnOUnhk6c+8rQ^Lo(=|qtSd({ zH%$sc^JgP3{)H=9CKZNnDj_zOC@u-)!a2jyHaEi7qhM-J_F-W^dh-jI%9dqTbUOq8 zKPzB%BAdR*BUIfeheM2R#+4$0v$M6fv0u0N>{eco>M^3b4V=78T@h?W02x8W}$WbCFiX} zzj_STNu-?ow&mQ(C4!ao08ZCM$(ZW@OVf31aAQLMMy4$Mp)p)0I);T&On3P{>1XrJiu8)$$%7H7 z8mtb!=lB=1yKt65Nyl<2eFvOohnNGTGPJ}ez*)L#5<{Ot@W3214S^35p&AfSu{+dT zwpNs-@Xm^-IjqyVVE>~$ejNDIp;TKMXWi>4N^)O2?RXK`gOtlR9fwVWdv`WWM>FwA zhyF@515NQDBRLRO!;3qwSnbGdR^m9~H!1QSH z4A<%$mGD>5KfRq+ntO1$&hhm^hpfSLaf)}JJ{KW#`q+Q%iSQ6c>=Qt!ZDJ&RvS89F zvuNDe-pa}r)Q9rawn@=DW-F4ilz*RiSWnH8sN1w1RrB;28Q6xckjF`eBuRv7U-v5; zE(*6P1-;{|(7F=RGxT5EgJ$15ayC|1lCMJOjn7ex*y8u5&4L^Bta8|RosMrr%Qxt{ z7nfGl1Z&PJy_LLNQWM|`2iWo7NS)!U*i(tKJw;B}UdWJkG@O({#T!%#?E^d^dsDL& zp?&B70U;whgC_`GsKNupw@QTbCW2TwIwr?Tc_|G)GP%fB6AU#a!kx!+p_1c#^p#8!bvm3i0Q+xm63>nch@*smt4YOmoG$ z9{H%DQcOm=22_bth)}XW0XBdmH>|MofzxxgYYt>2SckifSs0bHRw7l=*>JM9`3@0S6LAU00mj+X`QJPZT3$ltJt~urPX!h^O z#|BmypuN}!SnzL2$4unNPX&dB7=k&FCqG7>utp+Vzfc|x zR*u$FUxD$!S_l_FGy4icIO-Z&8IZfMG|1EUju{3M)d>kkrWj&rb2`Jkmcn9o)I`x; zV@u<$a&1i+h=_Dt6?zy4{vRG6fG|7G@TuJoLt zoi9$fa+3dd%X}R$b&9)e!wU5(^AW4|W+lyIu*dG>a*AK65jd`NBP1j!(`z}@~S-@&gR8{RE5XKgFq+KD!Hpw?N zHJ#S~4aQY#-aBw`GSeBSXB0e`6x*QKGw(j{latCBYO%WMT_v87I2((_MB|&@`dUF? z;NEeYjcxZd@x1$i*GbJ?L9q^^y@|-bO((}Xuv`r}0|#QLLsOK+m(kS;Jjd?Q z98-N5>}}Mq{kF6BxLT}tIC?wM;TjL5+{;qSzfsFn2Ai|WxQV#5vb?t`EL;32)N8sBxj~kZXXSSr%w~LY~xdn^!m_Ype{7Yb(lo7wklFA$46d` z)a-68hh=6di$^Fyl2=^u%f}Y$O8T}LhUSWT*oIiYrbDeU(nVEswQj4S{Lp)Sd%I^+ zd0aNPc*%bLXX(eT`Qpu@^0mE{q!KuSQwtVFL3&T&k$&OO@s*A%Lym$3G(JRs*%MDP zQcfef@{OhW&{`yCe5i9f1*=#-b$>&TPJzx?>nwK`pSnsiS*6q^R`SWX3{+r;#S97W z6q_}_$=t0jIY(_j&G(t00245J9E0YE8~2e_h;=)1QZI?;Xt>JUEaOO{H$lSK1zcJH zL(#&ln6NNL6qdEdoMG}qs9ZKtY@Ds=jJ?X*h2f`b%hD4VAunmcZ!|;q5nITFAamml zvfh&P#U1kgZ3evqmSH0kM%`6?Gj~Y9`DD-VS>mKUzz;TDd`{-D%=KYAZYvY_R+O`6 zF0TTOlp`lu8M_2HIv)Qj-^|c-G_6dptnwm2PZ;7MJ;@!8HWKS88pUQEs-MT}NIqP* z(SHRauF>MCVc(MsTjx7oD6GD8iA$S$M&d?XL*1+PPh2_TworRsP2D%3n;P~pb@5lF z^rAGb{mg82-XrP4bCDjVDE;~2Lcl9Pn?xJfkisj9CB<3+F25T!C+7G8s1~cXA3wn{ z{7k0-`N`)DANS7KkGO_o?H@&1uLS1O=QKzwf=hoU1$dJdt!lr?Xh=xvwxvDkIw#J6 z7?ly?pi1qSK~Rw&t|Ha_30d(54mm&F(Z6*@c`((n&4!=yXK z<#~>gc(nPjz^PYqR5m3LXtuA_?YsMEA%I~ay$*l8ukY&OZDfF~GfD+RAueiahSH#- zwcHn>i8_Wsv#h{aKI}BxPO3vEV%Snxv~@jm5j?!*=17v07kDTZ9JBcQ>BnI$W=h{Y zoZ6H6LBt9kd)}1MDVi!Gt5wd7IF9YHi`}Qfa5bE+X!Y(1sV!BA^)PxmIZMk?Wj`Sq zhPBEtY^;-%RYM1dEhq~MAzu1->XN#PFFRG}q8uZ++x$GI#1PN{q44yJVS(CQnJ27m zv3=9V*_bsg07vjbNmk{<$^e_u&;0|tw8*=$M3cmz2K>d;Fv6K%eR-pO!?4-7SkUJ` zkU+yIr;xkEZ>d#*q8LSRraD9CJ{?PuC30qp7>3htH!1N4#}cNkt=hvr)g%CT_8h~K zJkd_Y_hFQD?$mX>cg%DU{5|BdB7950>m~39#`h;_rmnOPA9yyo=?*ZA`cAVfw2`$U zrJ1w=A&!1-A9fz_8kkRO`j4;qx`5eHfsX&fR|e~D%B-`~&Il~#+~%^k^#ut>jT1Mq ziwkm;J1gglvvRz2#ea9^um;{ z9WBw|eveXFLBvrg5T&oyJLtsgCgGIRaA#=lQiA&O!^U2eM@u#HchObq%c>y%-=7}@ zHGJy1(%2*B>`E`2>y%COe(TeKQ~AIKR&7aw_4vT(5N+4qN@QCy%LY zWk+J=5q$U%`TyMTtuPvOo@YRPZCsXy=(I2KnDF?zLrnKC`@0YJcix7WVJ|x~q<-g8 z`{327J>2zqII7gQ;w92;`wtf$)tD}eVcBvii-9l3qeyj@6J#Ys1AcDd-Mxp0yS5hmvM5zfB^3gah1jd042`Q-A=q!rp2I+%GiCa$MSJX;PeZLyj^yAN}} zJhJhw`EA(x6F!7aNIh~0dlpO#82J|@GAy^6;Nq(*g{|ZYH^N$*aQ%>gfZ|}QSr7fQ z+DPu10e3q*+&;Pg7O#mPAU-Ap1)T`$Vnd0m;=f_BMI$6;d8tBhNx6DI7 zo`wL;LpeD)GaI<-ZGKoE1bvQ?W0VYp<;W{)il?PP9T7T*(Z$Z^AVzG0vM^Kme&Bbg z>K^A4bx^biU@@fssOk$nt7r7vk!m%<14y*6f5~$Fhe~yn+m@GZ$)y+?I?Z-6Wi9Kr zdU^Tyz>eKUcdm;uaM#$p@D9Rnkt+q1r3z{ZNycF0XuGNYN|ZCL zRG0t&BLX_XXeZw84kz@S;HzXD?G63#+JZO9;Nu&rBLD;SpJ{8dD%;jmbmk3tZ3?MUaSEYoT7eUqy3u3=MgfvlEOlt-dS$9 zL>g{dDYIcb4Z>xwOFg?6y}BU#9IlbHK=91H)SE?^MWSwifI6MwQiJ=~;pXnC2e(8~ zGOj%ODd0oRwOA`gcXgOZ*%a1+Ug+GUv#he=LMgT?uvhdp^H`awKI3E;ohV^!SRFw{ zF4R5^7l!mrr_eY%om{WkM+mcZ>npo6t1vdNj2$iS~=xqV!9?i~<>1 zuj)XFN;63)l7&-)_SluGjRl2f&}RMgy?ThPrFO$*ik<%Ro(+A(v92&>BlqeX&qIuY zx`!D&@ua89r_JEa26_F0x?SeiSQ3V44`@k2U!`crrRu6+`}nAg%r?*OdvZS$ zMM@XG&rvFffn{u4_rcGtbfa$=Z+di?iodQ zPD3QC2HR!S-%igK7DP(CKMgQXEe}Y|Yb~#pLSF@DhW~IZv+VF_h@ozh{u2ScKXS}m zH?$U~Tp$$Ig>}0I@(zdDFvvk7{l1DZ(hc+Ro)Y(VPjgXM4z?(@j#N41N2hBT(KW6v zV2t|L$p%eEsE8NDYT|YW$$zD0;6cjYKL6Lg%0i%Zh;$!DBY6gwne<#Wavx67L7EE^ zJ}Lw&$dAvvyi{tVWBY5*BSv0Xh>Eyif9H#e_$nB~rw9~bD{vE@17K(&&S;sB#YjT# z`8~PT`&7{7RW5XHjqy$7O^4T+Auc+qL6AWzgDOB1$yXpc5Ar+u2$G4BsDL*r_C_Eg zXqx5-Pj#u?#s}1XOg9N+^)I|zQ^d%pkklAat94g&mu4^xCM5s8TvvX|&a6|5i@4yb ztbPtZtK_bVm3l6rEci5`O)7eQ9(rx{kh}wDa1hs!RGf!B`kt0F*_E2;A@8}Duh+wDP&cN=k*fJlEf;^JDjJSu8WTm zkH}iF%t~gf+}6EZX}@)RjmcrmE%@@l*}G4{_LGmAxapO;9P1kX#~hUS+5{Yh!ZyQe ziX;WW7x>kYOXo8cgCe!6kY;u+O8X)1*;xcQE-L?fGV-%a-1n7%fkN>H&+t3k z@BvJu5?{KNB9gC1aXSF_hgZ3V@Nc_W1e~bNSPB464#|1{PQNN$mxFgBC4HgBQPRS& zfWlq>$CToR$+C#CVO%F$0)9pWnMzhGFtk>*F*|}hcfh|qc=(dIbD77K?+X1iqV-4e zZ|3~93gy`K`rc->P7Y@*Iq8;}=39 zKG5YhA%&}9T5rDg{GAwBn~?&X?t=r6b6|^TbWknQdR@pbXl4!>Tcub{9Z8_JAH*sK z=s-U)7nhcXj-jii5mfm|KWpGA4Rira*AwkbK~{klsi@9g0kc%htJ+uS^pe$5E1cBqZLMR^=Pd8^q&VMK2367mazYKB;iVq&4@jqKo_eN3 z7$GJ&bvkLmhG0r zvyghUL~_W#rvO~5lKk;3qr^i&Sq^Zi<3P}B z;3!mv-ajh6HKsMR_EUP%gb#PT3XU303W%DB@RD*a;M!x-g0Rx-Q28?QTB;MB2awbe zd5|r+eI=l|VHo1yXY-HFn}H-zGN(SPvt4T;NV;rlidFmm zS{#33p>pT>1H%BWG5$NVNwI*}8}eqc+v{)%r>GrU;VfEgP2l_WD0o*0i^XNVbCM!F zyWf|^Ppe_=s85*Kw)>^--=jNsJ^GhQ=IQQz*M~SOiVJ_VeKU3U+{w&tPtiz?lnuj z^8`z-lZ~aQDu;SZJ~!rJ+nXIs4g_AkFk~8(UFs=jY7jAZU?ln=W#(M-+9JUDN1Q`{J{)kqPZ6xNzdbvS$ zFx8*=Z`Qd_#3Nlr$w`X^Nq@o^3mxOeEcQ%RWIgZU?3_4xAZc)ii9^{O0wjg2X(SBp zGBJZ=SL(-qGU1s&#XmVFUA1AJkD9ycO#=O)ByGL zIDVU(hM?H;WJuiL?Cy~218_coeNUhsyy1b}?cJTs5A?*i&|m6nP5M}OenC^r77frl84~P=r;njC8wElv`(@~YRnsKRi zG$r+=6pO+^rv6`xgP)(4aR>PYO&IxA?LNq8+F3ytUb+wv7e#7MNoh3HuAevYGKW*} z@&dm>9(tqlP(2wr7P@Y^l)kDbf5Rgj3&|XLMZ)x0P(Li$PDlT-5-xfDQojYxRDN1j zKMpq^sK2w*eW_emaOdy8-Y{r78M2zJDY-wW3Fn$G;ckwjdHC%#=-C6-`zlEu#z!h8 z3%9TSQ!4SKJ%76+aM%2Aads(D-IUJ%QG#~AJPXWB*D35#XD}F;NG5k9k(sdM*s1Dx1&(2NHoco)92KF zk&fb9<$rYX$AKq1Q{qEOBXAk&NZm8NdY`z&ow}AwrO+Q#HSX-h76rO(Jc>fCEzTUC zryyrqK?`%v9g+U{@}IDe@g{Iu%)JXCOQ&0e^NRM?j~1m%N9QzG%-C_`k@VuPD=vX_ zE-v}uG1SbrLd+bgFE5q; zjiIL1(%(GT%TzL@|59K1%O5DzkB-OEKI2#qzao=gZ$0xYSf(|jOS*J(@vP4UQ@h)5GLYghE2B}KI3?w=i9PMo*k!Le$eDf?;&kA zmN;Ae$EJYTjQRSEhPP*i>i*4|`BdiO5lVc13(gT0;eS;8+2o42q-<~%<{4#VGq(R> zG~BO(F-)M+XNRAOxP}R__r*^Z#*8Fw5osS#CkOVeb_4s+`M8HQQP8qbf(lU6+ujN+xz8CMcNAI`x&0&L7I$r$uj@S#q z!Id=!dk3C}1P+hkXu2j>M&4%Lj){>=%?-JIctLy5y^|(!#>!y_jbpa$(b-rs8y$0z zHDZknLWE$vvE6&}iG6JBkgzlqAL6wr9Nt+!IC$a2i4$whz+h7Ss{&E!!fQ*?<-q{5 z=H6a@tnW=%p?~^Fc3mwa^JnvKGKUx5W+C6O+#bko+os$ww@5o)H?mdcwLo6++I)~| z{e`%nj?0Xm&<`B{x884Ic z)bvNiEX-UHBosIx(Y6uSB?}`w2El}nDtH9{`BS_3x@M+oT__L|XeAG@l z#{c^V)WdfNTNQ)t;G`yf6D|J{?uwoDBB{u#Lue6irS%fwQxye}V^g-cIFT=Y;%d*I zfB2&2H_;n977jqoIjz)@k{O|oPdLY^iV)PzPS`DW3_iDr8}!Q%&tS}q1{#kYcm3vm z)sRtjwWZ{l-l6&lImJI-d^b3CL#JpRp0lUvzPFaPh^{T1+md>yHTU49z_!uVenkS0 zl7k~7U=;;pyn>a0D<;r`&RfVNY< z+mXk^oSYLx6j8Lblf&%dJWb#f%l3x|zctuOx1PFd{>WWHF{i*}7Dg+f*V-Qye!N(_ z-c{OR6xMBLy`>US>n@^laPRvBw;u(^NMZGw^IJ~B5ubFd|M%8AHBGX?TYVdOB~I{@ z;=|KryDHS`HKR8C)?eAb?n4n#gMRj?>E$+$iu#_K{E8DH8=AZPNY8tY#QY>OJ=8gU zY0Ej?=!u}!u0tL(lFYl#tQy_4*Yw%UZF{?Ha&e57vfek;oO^$UmrxWS_1H>Dr=U|{jrdLxz2yMr;S{oLd2)84fE!l~sLd59OI-bNT&hUGoWsVLh$ z9}iL$X>Toc>(h5CxD@a`e9bR!uNde{X=4bA<_2VD)fEiNolnR0@M{WGsMZxv3yQ7> z-tO^xG4dQs&WU`TK%7obQ39`O=N9BTu&64D>@*9RhLA{hNqxAwFRDgW!7W{SFc0z~ zOk~_MT{u%%O%)0zOo|r`>Q(;S(2{*?r%}oqSrKKA&bt4{-g^Z!wL}ZU5mb(H04pff zf`HOQ>D?|xklvdhp?3(qsGx_c0g+xsN+_ZCq6mmWB5LRmkrE6gv=AW#z6qeGT<`z- zzub99c6Mg=tXX?z&1$pO%7fS32t?meTVr$gzzcK?s@y&`ox_Rw$GVNN=1u`{XpY;D zfuj64yRJ!)Gfiz0dn77cA}wj6qo6NcxeOLvdtV70XWZ0XTDmY9Y|nTD{Fquek>A0md+xb0w3T3tbv?ZGmF_>4zU`8{yMJ^<~$?{oq!ww9hXjriXN~k3dg3 z28o%!TIYK)h_zo0%n9l0f;GR6vrbaqqlnFGBy1;01QfF_6aRs9JPBkd!Sld3OhE$H zeMB~;UYIR&NXDxSsxx#a0S9EpK?Al`~-QneUZl-P#m>s-Nx1>aA2@5cYR!U1#KA@RhO?kkb zjxgPdG3X=Cs^&D{+@*9+X`{K&@2gI`2bI6iSK1VN{*X;RN!hl?*fd#LWOkIBU~>^> zeSz>HKuMhUq~k@udl$~>#N;f&w!OI_$d!>eknj4%xizQQ0qkuz4fqh;xRffH1kYDA z@3IZZV7s8HYG{O?_502owi5P4XIJ#QcWIMD^P`QIl!WBT;XR%+y=s-Q(0i~;V)CNp zfg|gkDiWqrpMXT-*5}AnDEI=ZjB~89!tP^}KoQGVAd2?7+y;8TxNwha-PDbx39Wsh zO`zMMNIpp_Wx`RgX-*f{1h={$Q7)`pCeW>7(R4S}Ag#T5^Q@A*L1R;SXWCgMFW!OA zR!#$jGaX4?LJFwbmvA2;M69uCg=k&Fql42)OL&&S-7rgsHM(=@;7Ju{>;D+iVY{ zXy8S=*|pnn9VJwesk751au&CG5sG4z6eRe|gdBd?PmzMhXAhvS(n40seD8$?AtSvfVOYYQA%~Al40=&&P3>l$ZwbM@9IJ!YAHq2ZF35^FFrl810f0 z)g>+lYtqXl`!$;j_piD8T!U{JH#)75ZLeJjy%CFv#5!IiKg@4X;7!^|5TIBhnK1E4 z`=Cz}hw9Q2cggKZ`=ja!1Qf+kn&QxMi*$}weflX@bJm2|G_G9affafAacR6W7_mNL?>X%8@_U)Zp^-^(DoNe z_DX@ZzLYoIOlXm6@y>cgU%Hx`<3qXh*ol|gj1M=AAJ*z9iNDmeuDskGB)knj(mO2; zWGb6U>XMV`BV>zbv;N)fv6(wzZC#z*i9KD< zxP|Rq(cp+^_i)n0hW5dGNzYGHG%nn~ER)nVk~q<-)Jvtp#c>m(+z)LxyVY*dhb2AV z-<|s;&)_8LD0SUAX%AM2cg`UCWT+#Ph~4l2)aPcK9Qw}E0G_DMyXOCJfbdc?%)^m7 zFV{k~!=FZTuVJ|9rV?aMciP9QzSI=L$X}_!Qp9?PCJR+?jIfE_KTWa&+Lu=VghGcc!X%Ym0mX@?H-w#_g{(yk6no z>sdbV8J?0bF~Q@W4Br3=2{dXyAB}CO_NCO@8V@gLa_gO|3$5@;FUFr4nQA8hQILPevNZ;8+Fr z%ca`(S_kE&S9sz~>;`d1OF$iBhQb$_jHaCdQX>~qZ|zLaU^U6R0kzeg!)XF-KuvPhx{LkSZRm>CJ4HN}Sm!smQ^;X_*I_a%LZhy_Vu z*1=2%7Wnhev+_#0%~X~K_WObH4{MJqq}yGrKJy%VjjXx$V>%X|so9(EbtRsm#KHc> zQFST)4`F(hl^yt_X$gsLwMBAadPN^P)n%)TWuf33HpZKdl1v zTCn3B_YQYW^q1HdygL;!k*3|WiF^q{p0g{v^=(qfxe=(ckNfxU_j>VRk}d+ZO9niS zgPEhKP|%1?R4S{d%H=@f4stz66>D3@pU0|-C$6c1)nZ~xQLImQcAaN(D|C4Ej0`u1 zD_>D9Z#{=iogly?tC6^l0zdgdD=(u9p%UFqhv#i7G(kck}6< z=^|E{nKha|Yj2ZlPMZ(|^TE{>J*niBXWGp3oU~dCe-PitI58V$?a@&k^GvQV3HR+ zGLeY(pX&M0$DC*>V(f|0QJP%Fsk|>wBNk@AAh9G`J{vLg2oBYyP=0LUEjts4(03d+ zru6?T4|G&nv~ zN}LO0=TEk^wv2FZPMlC*`f)GKw~E`xfavQUJFuk_KnD7xC?2p(<$BDWysFOLS(TL@ znq`B;mM?ac9Bxlc!S|rg~qxN!q|0&QdqZXA?R38&9_>#scML?1oWnVuai5 zsplXzr9*S(Nh&@-2{x5Twc_qPU*1cVhE+`FD5zihlv_K3`weOR0MX0viCDq;HJl)d zVhJY1b+F!!#x*DS zK5D3$hNwJI;ALsQ98Udtrc)+@vGmbKbrvn;V?WF0WV+jOU$db{Tz*|uR_6?0{m&^mIb0%*OX>c(9NJ|vTz;mr zS+4|}J|U4jOFeNmVn^tS_=|QtQSzdxRs34No<}g}VkH20w|vyWb7(i8?u=KX+XI;n zX7KFgoCBe*AU2=fO@Cy=hGBiZQMVY;anbBfLCu=^qRS~P8`Ba)*aMK@o4;U_QIhKP z3unBl-v45P7w&(yGnSeW*Bt&hsbMrtoAC^%!uVa=kfOydoF56Ry4&W|pTqme;Kh@uq?uH?O;-z0g+B7}Rt>xR*49bB zzpW5ITR!y;wWXw&m+ypq%U zYNb|dEB~vZ&gGU?qrRv}f`OQB;6&++xO%rlFKL#&p7b&?F|Mh0@?8t6w7#$Vmo-Z< zM&^7cK518Ltl04n8^#U@;B#82lCD=+W$6)j4D-|bf$uw|*o02-cbA`mZuR$<&5;PE z&$)9I7B?0v5xRFOk3t>L&tA7$PJa3ep)Tu>vSo>^EZ2h%UvBU;sWldIl>Dwlm*z2; zcyxKd(#8BPpud#)NNM{anH>kIX`>yP=vVLC7j-I*uIidn{Y^lpX>AQJmoWCu5WU#k z$7Rq2T^k(pdf$><>%he&domOwtwuLEC2-P z+%%8gw*$G34t@%dX)R*7AASVT0Z;_OeFCL+WSoEY9(+TS(6WA+3ZJFv6yrcg&{$Bv&+z;-;lU-fn_6k!9V&^aZlZ3bC`9|3kBrX6YG*l%(0 zp;y`->PhuP{3cyL1%#tp5g<7gC@Z`>>;K=6AGt5v*#wLT4}ph{uZg3`SQYo>pF75G zQoMgP0M?7jDWAF5fa!0XR}w>4AK!8O{a@aQ`rK6ibU}0%FBY6%?qRPQ z|CZo+EyCAzUplHVn>9#I0;c1iNEiMs(s9qYW5u>L!8zY%lW;NIQpVS}?SmOXza%Fk&)O|1Ig5#16FDMO6h;3&V6b@ZGVmZD5c|CAQut2IXfvNqH4!@vLoDAo3v&0o*iy{P+q z8y=vfmQvY!v`vooVJC~z1{N2uyzEE01{?w$IfmRc;r{oR$3iAQgbfu84KxLGB=wXw z@{?4HWG{J+zYF(TRfZS&HB%nBSu1~r5lmbO*%Io&Bj9z&>(IPIG{L6YehTGcogP1R z{=a>5evV3R1BEy_4KT88OMewpS9uN8%EM#rZT7>Hsg*&An3uG*oVG+N_4{+*+5`U^fj!&iYiRl$gEI z!FSu#Y&(DAe^8MTfng@4#9QgWAGW=<9U0W&OMet-&S9>1Mz^KmKnFMYbl-D(W zOB8>&73AsQ^@5fX!ImM5sGXSHP_N6nT##m(8Z6z)&Fz} zeoAMoo+4|)-Kl^f51|l0)s=kzaDV&NDtMjfCf+hDhGr0?-tLfh9%UBfKYNoBt0%V@`} z>Lt^iHMSkE>=y{T7bA$22|#G+sf6LT^%$9tn{TL8 z%|^YE{9^FI4pol+Ei5B=4x1niDrPEjOQ zhd_beoRMQjirZ6;N8` z?3a@jb9X_eO49L?gsC@YKU#jsy+tChIsrpiVv2N)!z*K-ns{=uj{kj8J{ zxGRGe19lZUkn-Q|I}Aj|njqymWDV5(RPCh9R#T=Z+9~PbQM$)oT>ZXR;r`2`Y_Gx| zKh7QMEO#VDSNbtU7N;Z(G~(myU*QaRJ-V{#IAu>yCVm=Wy7R(|%iny?fAs_(1F8Z< z;q-m0_4gXswgvpe)$VtO#21;bn{e)Qg2z+q_n$kLOZ?Y**O*C>k+TMY(>?Ydl-w|{ zSmY}JznJf}G%V@+*)%-^ZVR!NQ?S2=;3ehA37efOm_s+pbX~s$pjd6bT`Gngl!3cBk`~->xv3_exC~ z<*Q$~KTwyz$9D&S*~nmMm$eG^{VhQ8L-uJUx0fcU4;Dq9SIV~rUVBuxcu2;@-_dlQ7v}mnUzR8ab5x8#9i)rM%{3XAQ^4fn^ODj z8?epaw?Gi1y2E1$6Xym;Wp+H{As_DkX04gK8y2{rZK*?Dpu`Vy`H;GHBte1Szuxbb zn0*n0^D;4~AP_Q#c4}ST+%iT1eVVG!g0$%W7W-;?x&*^7UZ3$bQdS@-cb$>vL{?S# z)Gnr^>Zksp1=$`L_y`u0LHPd20x>SklCH%QmNWch=bP*U=HH;ii>Gx;{9g~n zU`vti7IFr~{XPTp;E$0?ON+kaK?^H=!%lLRIwnmsS=SI8B{koQ{9r<}b@z(KQ?@GU ziz`P3K-Ptnrt3ok)PaM7r1>r6rcj|CFJIy(13H6QEiUpKV&D2Ohm0L=er&6xW@F>5 zq255+U@gLHwlFlbZfS`arQ`W=-tLx3Z3%O+@s>9P8JbcxJrjygk($^Zn0_zH=YP^W z5PVi%E8xDy(05GqN$Z-1I&V0wkieJYevcg+Z}{2qA>G?XJCDuf*L_>EXgGgtuC|Q7 z&gEM+O^XA&@rb*|5O2Zq6wp?JVs7x;VK_mB*Vlb9b5O~xb0!0{1%xgaRFO?)C?|F@ zz+?0Z2oR)J%xLt*t5C|L1P80I6K2s;E$ zWY7uqRMoVcqQcPPH0>a#CS_h#Yj$RYV^|I`gz^|I`ue`9 zw`tR96!=xAa+XccG-9OOcO(Slncok}EMMMugcw}qr`w^6>lW=Cl{W}*d*x08VSBxZ znPaX@)iMz!^KEy|U+(@}aH^XDT|nWnXyEIQ6qNJ$tE!F6yWN8A_rwRQNk6V&1*lu_ zoLxR8TB3VLKy10d2dMCW3vw->V!h*rBU4_n6ab_}++EufeZ!IKJD$9=3;(_oqgFPS zfPilN3Y894J*NXf=PMg6sOujT-QOXSGiu;_)YGTvcjI?->$kWQ;=#$dOEnutYGt&=c4hE zJrJXA4NNvmrHL-3gi5P|J<09Ofiu%`)jn^};!{!Khscc|m^ME_x7L`KQ6Slz!(0k%dtm?m;F*Vk zF}PLQ_g;Yn3*tWFU$9gPYB&GHg8kG4*kk`wT6&kOF@6X?_K#i>cngd@4kQrjzrbn4 zQJ**0jp4-sDp8eeuicjj`=ft?QQ_+rO!QdhULGiHvj_yt8f?_g{@ubi`n+7{`z#Hr zQ-WCSmS=<@_ZgS_4j!39Cwm?;r+D>l8RxlAr9Ld0lwk=VUs*;W+vHrg_7{#s+u=61 zs?}svM`;nuQ$Rhg`=0Uie;@|5qIabZ#Aap!Fq82IF!WPYm}gOWPGDn1i}sK>+4W*4z*V-6hNnL0+NU^HkEq!fFboU88IN1^eGW%yl;1 z_00T(GS_2$!e~Hf-99g3Kp4N)!vExnwi^ZCnXX>n-1kE`-$}ObIyeRC@xjg+YgU2l zbieaIr^QY`GYiX@Nrs381ttF2VbWcgn-+kb4Kl`vX2 zS14LU#N}Ow&VM!l#7g9rF1=-yU=ll9lpnMJt$imrtF^n5jj1`7gLBz{xs~8QLlqeg+1|buW;?>p{yr)(Y z`S6D>O2{j{8eE9w)TUkd>P4Ab5}WSGp0&c=LGFGbGylb!m|ycHU!wTE5+z*0G){@a zFahl=sZwn||Jer;PX9wp4$kqUlPVipOVH$ydbKrszU#oho3^n3lLL=*n@VWnz?|1g zSe8E;IQ(*I^D0qYE8*zB+XPGXi~VB)wS;-jX7%Hbfjr>^dj$$aIObC)=R2OT zIahUvo7zc>Ta%WYBK+uo z-Zpa2zYy>4`k@>e+0<-@z~m^}sYeyYX;ORAYHg#D0^>LeJ=5tII(MT@dJf z9s8kx1x-C01Az>o#R{?uvPsS?dRQs*dYix=h`BwGY0S$^bw;eOV?P(bx-Qc5FAv9? ze#yayV$!>X4j%Pr#CKb+6Ab23HK(4@TXWgT*TO7>dO$9eoZW{iRG zFk;nt^k_Nkb}M=MX;|AgAzOX!`3=Bb1)r}q@a#Jr;GBAY+vMB69)W*(I6xg?D$^=3 zQ(EtPmxu(pRa`&FZoUEW6QOLqm(~}wKksQ_29j5c)ku}J>ve2is9ZwHIBP)2TL$Zs zkNz4QnNGK7U~zDU_Sxsf`FDrW-(#|s?02&u&QXRY-Bp0r16j$Aoo?2M0loHrb9k(68t9i{9WDp`75hd;gj0`8<9k6?)DTzcRx>!J@2KxFpA{Bb zBySe*YJF+%)F`8uHm|5af!`4RCZ>`^GSLjkvEU*VkdCUhzQ&BO7N3w<{E_&B^c zL)(2uj~m1syfZ>V$N633(+^U`naGiROe!L|YDM}i)>i)`Nc%9ru#ZtVS)*Npv6!a5Mx-`tqvi!P*bIx54;Tw5RHi4L0yaIR<$C{7ch%tBHF^79J z4u9l|Z;&gW`DaNxkDj)k;K6+ESV=dNv%Cp(qF^>PvWeEc>mDorv+O3Fj>3>jM<@KoJmKP!{ zqJYJ$uqlon5hG#zg!mWj=GM1-ZjRyF92WhgTww`ghbGNx8~r#xDHc6~iB=ZT;Cpl9 z3te2XWtv}Q8Jn*{-z-bQphoKGCzep*)iylWD>mRtN16XLCB(C^DNy)_>J)4shcXm3 zPiA#`gzaShcVjN!f4i>Aj28{;qr2e!&PlU z&ibbuGJT=MY!z8+ zwBcJJtP^D1=>0Z96&qCmjh-GdqU4^mHYgdv#AqvEkltVIn6F^_kdSYPf@TMA5Sy(F z6<{X#YlPL=7cK#?C=gj6r(m-oC#2qel>kZZITt0b=GzfQKE~v6o%k}$ zlJbEAr;t}SxjE;G=>vJ+g>5BAr4OhVnqJHz+`&BL6-A5A*+@E3_!&s+MH7UyRyGuL zq;l*8r5fbJYW{pTGB7^62n3DhLd+C@UA#qd-WQnG<021I!bh;8oaOUwMc- zF9p_SO15aKIZLi8D50IbIaGW!h>s4S^;)6JFA(YWH88Jp#XSpB&6cfD59ijkZmfO3 z@ZNsBsgVhoa?ER;Vc52z*CGt$xNQHO9pAjXR-@H7>4QO@X&+K(QDCGh^D9NUp|~>@ zeb39(%*0)F>udo%&qXu?+x&S0{3vi!GiUoz9;V=wTWK=v@@Zoh->j#gZQ}R7PnjTKKq@<(<^h*Q;!8^R zqh+;wY}J2EX{I&3aV1rj(I~`vxDbW;yGS{nC}QYwRC#<`QSZ!a{;sNz)BAw3Gohhc zbc?b@bbxU!4{rK)EeGBwVlVWU$@fg7-(SNe*o%$!dIa_P=dL>tSMd@BWSz%%*2Drl zRnz>`eVlryOrkau<*Wlst>=rngSO1&hVsTsIs}#*KHQXbmTH!=?s`$;stPfmS;t0= z!hQ@(cQk!?;)<60Dx!X3G2w%uNoFM28nGutRIR}biiTv^4xYR7skOa} zRC+(nPm}m5>rN1r>HIqJW4!aV}&(Tjt}?~n-9woX{%sY;VeQ?>ji z0yI^1Y~q{lRcYgQVprf(TQX3&p_nKiM;5{5VYcG+dYu@K%V`gwE!GR4Zf<(?uU)=~@VqxpfSJuzRcCsm z1y-XN^V2XtNFVL0A_{Qg!_G2zkr+eEUSUxA6K>ItZ}_rZ6N{R6u?a(8ZIEAA3Sy)< zZ!_CWwQP)9l`=vR2mcGqd@DjlNn$ zV%X7duw*lV;C#t^U(eRICgD6a=Nd^bxR)Nuj?a4Ob=X6ht?V9p{Kdg57%iwQ4jk{N z^^5f|^jwO>ZB(8_Q+4~XZbf)q43juudV^S*@BPTeN4rzQjXDTJqrcbRzjpe0WgYs- zDlN-rS--cIjJ5$dPW3!M*kA@&Gg%uc3Husr+a$FjRDtP$KTEZA&KPv52@g&7*ND?# zwAVj1?@q>E!A!^Y)1)NHo>TY{seEAZu* zW!7+1N#9=#9S#JMkWz632j(-_BU2<_yTbIE4Fg`*b?@ykneAmD6>Z*(uP>*NnHk!k@>-O+%{3U>WwU!;qsY0Z z0Mp;&;<|BjW#)3q%tTmdHt4_{A=xl?b1Fus#NI7}nnEISDf<38snBv(11*PiL1XoB zG2X*@Zxm({>+vHDZGPfex5OusF|{}ZyHR7o7_=PP?vBk_zuPLcpd>h`OH^Sf>GZ5D zd^^r6?*~i#^dnb~;9MgF9D_k$`~y02^V=x6R2Q_45j71GGHX%_v4>9Km4D#do$}4 zd=*tg_UEwqpAOJmJuWA{30ma9Y(bhqK!NX%lydEndXUk&4IB!u3jS#Ad7H?8Icetc zPz~J#=NFd@KjuKko6F#-EwZu|>sD;z`iDUeo&inkOF)+Dm7GUybJsdJ&IKYM9cn5A z&`+&v9zoQ-)(wSTHbUwom9{v*y^8fVht?wMa8vS&v=9I zi3da;Y8%6t>D^Md-H5V*f>iG4F7x8E1rk*&K8=+i zB-Ipt@F;sM1_UN#DB4IF*DHw8gX$6NY66K3$K^>0Vd-mkbQ1x@pu{iFTu#D8Gm!dP ze1|K2yDr5@HLs_Jxl2hn)V#QVtJUyTbslHG%O;Tl)2znN2XSDb+5^o|jQOUzCW!S` zDd-;k{A7t^owc|WcY#SB3_;vjaZBADRd#a2xHU`Rv;4ctXY{?Zc_mwO*Z#3FJ<$q(f8 zeAfS*^0os;B*DuG5#6{*d~?xPS2LR*Ba8E4=(T;A`wY#v=z{q#cQ4L<3Bwv?4V`RUPu+Z0>x+Bet9mPyT3aAl;aFG=8YYpC$++mEm&#;ex)d?sX zqbxRtUA=ruqCr6|4{(0*@aJS0SdW{-6VEYQ%|SBtW0dkZt~?N&uTr68&zIR4tBiYGG?vFOhZ$H5((jHG_t2je zk$SA7?oby&KVrXC^^BtLi3lU%Dr9pkUJC*U`te@6!X?10X9pCvkl>>4T{l_grHbop ziIY0fwb1Ki&Az3S%il^LYm;n<5`Dv&79u4%#@Ycc&&Q*d+QJ+N_BCqX3H81jS+sL` zP9rW&q#ls3GIHw)J|+o8RKKNz6BlTy(g-N?wZE1bhZZ$v(P;HN)K{Wb5?Eo;Q#HA< zxY-d!UPDqg^sg-zxT5U%X3K|h3*8{ftR=#%h!Z>AwxCwHX;17r&bw`Y?e~`-bQu{+ zW($ZJ5W!qS@QMv@NutgcDA-DtR6du>@NK^ov*t_uf`1N@fCzexZqCIrC_J=Wjahqi z6B`55CN{GjY%*@be|czv*&0;9N}4%K-rU47goeetyId1Vpwo+S;8T8U)91|hR{K`J=+*lZl!T>BRKN~i`#Q0x27z+DG@V8C|k0K zQvk6o#uVSrg2V3Rc-~i%@i3YpG#xIj+3lGs0*eW;2rvOW4GXtw6NOc4BUnsf(xJ zJQjHf#|N!giORJn3Lj;s?@!un^8Ipwv$?)b*E2P&E!){}q;xESRLbwYc^VUeAQuYp zIGhx=FJsY`&lvkLt;x)~D#-Jl|Lh=1Y}5Kv<56H@yZAnlfqX^g=1O`mGOy1ra-DoX z{~Jqsev|Hq`CCcE2+J@K984I<7WSnjfJ_YHw<^}#6qs2}XP~Ui@k3gTbz>UK3ZY$rzma$~kV&@v&R%}&=*s)eAs|U4!S1AJtTE`B@thH~dLL~&i@gIhW zpeQnVy`3RqlhXSZUN$gNG3G9bCynNS%yK1$lQlRUbzA-61tLS}tN5iaVWD%nM#mUH zT+7s+X-Y$-x4&oX;ed4K{*siq4?L30$kN8*^~VRIVVcAR5lXX3fc=BzfP1RgTL}B; z?`HO=K@7eau&wvF2;G-r5oPBOxN+_Ea;`)jQWfLQdhmmT&SM>6+YPKJRFBEtlO$w% zSFY4o<(g}VeWdrW32XRg1?X{0u4!A0$qSz@E(!KqTc``|BSxu}K{DB6#I3+N9TLuaqfjzymP7D#lg%hxWp@Mq# z)2)C7(G2q4chadUY;nuF%bUvxx@zF#n{Cz#>sYxnL+YL@;O~2iQ<+I2(Ob52iV;S` zLY-cXRqG{A7R)7Ty{83%<0-o1BqC4kG|Fw$S2fx!(Sosm`k#Wqbl<|}f$mbodJkp1 zXhO0_u2%}H*AW>&@Qm*EXh8HRly`Iq_r4;{-V8D;NwF+eTI;^q+=LOT0lX&{m-CAJ z+w@|$!c?%h1UaE*o^yxMYs-h-)63u2A~x;b>eDl{8R^=qU)vL7*}E`2Y22LoWF@bG z%xcyTO?e!3682@aG3?&e+{&`#Xpk$$X~mQ6af!<)KkF!MwZVwWMo=`+_c#o*W9{@8 zoDfoVizR-7gtba(3)SdKA(Yodf(VNt8Mjnq+1`*L?HV=CIW5jW&lH`6YdZ>JYy^Mz zGs`A@AC(^$coEs@_V^?eD5b)pwq)Ffv)Gac&|;gOSbwKZp7GMCK*!H~z6=*_g&j8* z7oAZSew`Q0T7htYs8ZKs(x0q}kh9&loKhC%3M7LDiDHqN){e;9UO(P+ARdL{yV_aa z8T~~T+r;@O+4Dbf&yH`dCJfAU1!lN+bS>wJMXybYSf*8>8dQ8X6V^;?+C_q_x)SK` z%qO+VfFrUikAUVBc{rtS8Vd`O?JzDxe@N<M48K^*~V2vtzcSY zFp*a!q2pGIaBxqHUNDQwiR{FY+mfYS2PT*v#JC;oInarvoxfnwz+HTB!;j0M=?2?= z_EwoKcY8n^&M`8UzT}kNiuZpu|5#iU%Q+rAvgyJP(ZwEth1Uz~0&P(jZ$QnImePQgr-f8y@SYY*U6-6n9v|p4q>St8|Xbf7QUVPe`GjFey3`yF(sxX>mtH?J|LIwJ)?3$q72o(&L%*xz zz+i~N9{WAm{qp&561vULxZKZh)cBhBpu^~(iuSQdy_VSr9V&QMfkf@NvmwIAWJg8% zuRNN!w%D_=i~J}XAs5*N#V$|F_vzu=$X~fTx+nO4@TBI77|#NMNg92UP8z8EdqRGw1x z`TP{6|2yl~TE(G5<|LYqBR{cwI6pKXBsK(n5KmAw5Bt1%Kg0QoB_{S^X_oS7?A}+$ zMSBU0r2R46Ca$?RO4lCC()vr>2!eMzwM;7xskD&jISFp&b{UQw=4%f+j{bV+vnjpT z2YYVWzWzpu5IyumH)HO?EIa(R=7+tTjj0Q@)ZWi09wa@vt-jovu-djrd;G@rf;oU) zHnf)hSZUw;bQH~Ca6~XBcnBful`YMbaMXs(*v3gCdHN%k`)fE&hmt+B%asgAh|9eY zOJZviqek&*W5wG$FZJc&3R;EmT5BDFJk{+xFKw7CZrr$5rPR~c8xq8YlZDDIIAu7h z?y;neY-3#flAWAdRR%Y#v=I5z$inqt<-)a%2|wEri0n*-^PWgP7g<}`nWI8s#DmNd zS{3xERjJA+)PK#Nmr3m`!`NFb=hVm-Hz*l5J@Y=V9&^!qJx=MjWji>VnpJW(cvFph zzNO(9MM?aYj3cks2FZNCOVQ^h%h%(7ZNF>k$8EIB75kanjBUI^?rB!KonNc1 zQQ@P-S2RQSLfwpp>q?Q5ev`(uJ z@j4|06B3oUB|o@Ve_wM8cnlTejdrww$8BczrD%SXYz#h1<5+oG9t(Q8bc~H@ntaPa{X;fE=FF?62{a8Vl1ewqe;nV$RjMN&NSV6z+SMIV ziS%GxZG_t>&@rB?doq>Zf|=L~fC*gW{)Ww^Wx>tX|B()PL6du{f?;3NzLe+gk-Bdp zCMEWi&{Eu@Pg4nq=AFji$&hFL0Tfm9GYcZ|k~&$H5EUA;mhhA+^O*y&Axx$BDu~v9 zR54!KYgDop*}leEZT{GrnQp)3U;eok%Rwv&TJ`y2MP6SWH)tJbZ8pB9WVV#`C&6%( zjbpPg^Rq=q8Lc)qaz9BfGv~i*YPqEEO338c5@#U~Q5d6^B{Z&sNm-Nbl2tBpfUHj& zabSAh;M>+}3@u~y57HeZdbHd!jq3vIm;AKY2R` zgfJ#K`pYwB^J>K*kHQwUlRSHbEtE9tW*W?rG(Yuk(@Bl}An36lJOD{1#*`+6CW@u# zHO>q#pFnNY%-8lqH(^L`yl?r%W%QzI>$l_bMc*Ug^r>=*;Q@`668x2)*!YVt(nOfh z$OvCL>CHqoxp3ypnjC#YNNRdinbL8bCWDN$qhtR-ZI({(ObFNB!Rofn`R<7C(DUYU zxs(el14XP&w<_vroGAnyK&PRXa_Ph6B*X2N-d6+pYN3ax; zlAwj%_2*1%OD*NSXMZ#59!SS9yNqS-mv)?X_GY`t>3!k{-D+i#Cn`p#l=L>b-!IGg zYu(9~px}Jas`Kxiu_=o_Zs86Fk(4L!zg^E{BPZn1O?dhlQXN`Dh zC_S~_ha|L(v|u$NceW1EF#BMxBaOwbd<#bfc&~7h@0Fehe$BP#X1d?>`@d+pXS?5m z^z}2+4=a&}-QapyN>iS#f6=+qL%_MX`X+`i%p9tl#m56^+{-ZSnp@X>vua*)Mpbl< z+U`X%G56P;9AmhLOdj95mpm8L8t(Ar+LaU{@}*$1c{1UHg+!-Gz=M5P8y-0>$sd#r z8Bvr7zkh&teR_rGJ1uRNz+aAOF?+=4e>kh-rq)dQr+kXw*rJGiW)3m?48!D0R9hbi zr$L-vKM+nje+u>|QmadhcTY*2!gKA-#}s)u@ zbLo-q7d1khJ7{sw2pPEf{w#ag@xq$8?PY#eKXvV~ZZEID;ux(%5f0trsGs%XjNtVe zzIVATx6YHp-aAdCeb$p2SgEC7+t=sAG%v^(=J9h zm(V=bc);r}pOyH+pYe+8;?k>4}?m@?-S z_Ei}Wt(#t4UdVIr`2-B!Zrxk&mj>(u&xs&Fx9QB&JI4=pN4CqG-SA2NQQP;%+@Xjk zj(gu5``bt6Xuo3nDMoRg2uYTFVf!P|t@RJx2-x&Dd1$tMSyKLm0u+E{d**4hFUa{4EZUNdk(Tl#Fs#l+~lpvO~b za7Qj3sz;k}MHmrSOAf6|KRjpc&k&*KeZZUb2XCb?{xfm5)9}$zGqu*$IIdhNbed>g z=z)$8T`jA0Y)U}_PPTOBDgvgbRz~*@jRzbtn%Uqd!fxKajz_b9BPqOJz_Lar_R_XI z3#t0(Yo9&%#?~)>>6iq3gfi>;otV0+Vfv)+(KvSxgd$Au@}C^L_kq+mXCLnC^hkg!e4 zCBc0uH0)skDCH~`W_9ER7tyMz1*5Mov`NDn^hkd3Ey1JmP?wHMBO2`MdsW4Ig<5-q zonMb8YlOu%A-nMU#YudhLGV5G@jU|ZLhn)i(77zfr~s~ptSAlLX>a!6RB|1o!qMChCyFd^fx}mzr{fkA|^La&<#JC6Ty7C z&~myzU{?BvlppUsT=4vxa7eS>(lB#JZ$*}BdvEtQj~K_LUaobEMH5^92P5^QcDEm4 zs(Vb;2Vl(CkTtDO^^i5EwpoDh-&h^SW$_wQA+hx-t~d)ijWXZ&-~F)cl(53kkqMMn zdz4>8gdokMBX;q&hrg2h8BBZE&~Io&n7D_gNO#n*xlliYD+(U%c>ffG7D*ZNkY|nu z+Ue!2@9p9JaQtFAw;}Rjz!pjbR~^D-spJ_C#5x&mUE$5}E!KHngRh!rYf~3yiir12 zHL+(mSW$CsBVi~{G@Rf^u2gI;*LUe5_(Br#pha|Gp zxZWRJ+<=2W{A?Y@ya{cz8&w~{ca2Z9xO?s8bWLM{1^NHc*3^DEWzk7_VZ$?o%i;ix z_r{6{-3g}QM9b(1h{9{eSSu5wJoCQ{8F`u(+*oNtN@$wjFm|EeNp;M}+zLLM7pOTZ z7Lr^lh7nj!+iyJ5!_=~js`S$y-*@~g*CVsmqhOvzUUnL#;Uvfe_&we#A3Vk{nNAGQ zdEI+1x=nacC;nXNgV%(Y3C^UB+PZ!N*LOUh5gmCk(0_`eiz1d$8Ir&nHZezDKhmBrr;2&=02he#a5f(ulh&dvH`gj8y|4+Tgvm;i6=p@L2mA3HU{pfHzJCn{JkvjD`pg^Tr#pur{h6;s@ zp}<14!3+eIg%EY8eA|-)p#0EKssV0|N9Ar0X=rq7NEC}trHxEP`>H(+GVB&qknH(T`BlA1FW<|b3TAp*Ku zJ0p4NgiI;zM6uq$F)~?{wAX>k^``9Et< zmBbax<#a1U%%`6>^yL*2UUJ?Enc{qh1Qus7cVqm?F8^yj^bunQTT`m!tCr%sH^2@h z`hH)8L;_jp{Kqv4y(B%i@dxy$^^Th4CSfTeljWjqU5u~pO=spNvLAfNE0`^hctGYA z6o&c-hSI%QadPWfyrGF58t3>pKH)rAxaEgEveu~3dOPXm2@PaCa)-NTl{`0s-B3zU z*VzkHFzADxnB1k}YlaYd`WWNzhj_z>mJUN`(pbc~u2Qyyfup z2L2E!W-i*7%iv||Pnj-Gg{cX5=+(JH_x+Bvt>>>qBU|RcSia)@vnky09QRW9OQ3|<&N%t z?ivT@kL%VFFqDDv1FZmyW2Cb^X&G3*t4pEM#}341A_GG{#7?_wa&pDW*QAW1L{mjPSUl9VcMpx!GT(BnKB zwLXTvjMC6Ty*{FdE3yuyZi{hoa-HsH`zi4|ik%_*$<84FvF*Y!STOUJnydczW0oyx zdsR@JyK$}Xmbv|>C?a2MxzpyG@}&o2jps(aPBQa4*TeXd^ElrvCU*}8?@2VyhJj&U zy{d!YZ>zI~5jJ%&vM@fwKRO+AY`c0gNd~+9w@2WTNb@5uttN?zOhhGCN@j7Y_M& zT0UCBK7;+S27T5-BC$XBG=P%gp8CPSctG)fWJ8|Mjz(Atg?_qoX&Tn9ou%>Q+FZ!< z7stx#D7uaL=u5sDJ0k4t=>^NH}!n%Io_HG?%5_V?kT3AsOs$SiRZyR+p4Fi)n@2Jg5O|W?^E7$eB{}8 z(XlmbqD?#F(+l^|byA@_ud$tp7I&Yz2#m=dln$>M;#mbcl6*4DmS0!{lwo^fPN)qlho6Ol~{-0*Q+dcJHL8z1t#YH zi#=DS`3%;#7Fvv#d~q=CZiV}8#EsAM_ndAszvG)!&Ep?f{wqH+2tC&$%nuv@sCON= zHkEZ$Pa4B{%dF*4{ES$ZjX=@+#I4>9!|fdrb9XnMQDK~1%MdAw5%P4omB1E+eFf** zQ21`OfF$HDOi$*lAX39+Ui7LPsm6=t)~E5x!f<7X{LMb@ny>dqJ7u6Mp@5JyELJXt zHsf(6eREf0v##F4Xb1JlSg<2Iw)%@)im}{5)7xR!3H(xvOz6m(k7@ka)eGc1LQtAR zy&yael9J17D*NLz5RbNX`IB$83maIyHOjs4_4(LA2kn7j-_6ZQS zoCuM-?5QxUen%UR#@9K-(3VC*2KyxZyPOLJSXIZaSM~Hnec|Nb#j#-x6Klg@n{UsN z2)jyyIq7r5tq}Z~Q2OmM>J+5~{xrBTzAS`0yc6Zy)2yVU*{^c?_ZV&%KXy&fv=RYM zmw2XBS%f&Ds&on>gZWhS5d+x`CbDB1#?JQ>LxZ;`H3yXKs!-K4<4J}5-X#5;H)ks> zBl=5k5`GIA@aO)2(dpy4E%M1AmyU)lN$W#ouFgMSOPhGLrT&v|y22(M64ED%4tiCq z0O^_iK3Qk`JcBmF>I?NLJfzuqnmB-bVVU9V<(XGFu}*#eX)d0Sr%`E)?rj!nt5BBMxjR#oA}mn;?P;4yH+&NBMi9k| zO>uF2p_$6zRY|7|`usvA-uCTpVZ#x=vInzNjT}n(J1o^vPcDa%(5*@STt#bjBFTuJ z96o(+m1RFcMVT8C3SC$@s4ET9AiafKur>yF1cVQTU zBUMsn^xqT7sB#==97>;jb)(LhI@i#!tF3Xfa@2v2O8X>6g2~kek47;0zYNW69&(qM% z9%JgtJCW>gWfme)XlR56Jk*>jk_-pkWR%Djy;z2C+i?A*s`%lV(+K_ITYYl9lxLwN zRjIp(5o*rJpbv+IclAPPtU3=xGY|^iFPO!97qbpz=f4)6Nj_Ppxy07e^N&qZA8SVr zBj1SZh02<76qJsEHyn-wi;QEH5d+sRG@I*1&BMoP#+ew}EU=t(9lC0VF+`_5aF z@jkK-PqXzN$_po6PH=8!kkI#&J7^75pWD6vXpo!lU*eJ~7?L=>uFW04^ya~OIf>Vn*4RdS2e-3uwLbpb^c1eX#aZ^<%hWfL2JPaS-4&DL&^PSgb%T;~4upYH?2m{g zUy|x!qE%);)af}Rk|>fwuOrUXJwJu8++PJsiA6%DD*?)!NGBnte|KoMOZ9kAK+mmR*7M-WbJS`BSfECQtm%cABTPho257 zCwzhBU4$FmrUTNjdrgsRL=vJCjp5H)Qhz0v?zCP~H02==2aefQYKzRD%bCUOp`%g; z+EEe|S6R${$gm}4W?f{0i!5&m?3N!wvo#0sSU(ImO(aEb8xbqvnCjsDCm^3km29XF zN##LI*KIn=rJdRxprpH|YSKmQjk#;rc}l5VNoE5kCL@Km$eW5>Rq__0Wcei0vfE8DYn`sGqh6y9VWTZX>oqwLDXKmQ{@wrJ@(nd|2&5lwGIcF;9 z&DBc0gR4_oR(zgGcI*c9H0XZ`pa}+U_4mlK zHRc@_s>{LWTN@lH+B6-{nqp_bA)WQ$+##vMpty|H6<1$OP2%q6OTmK{{do(V^-v9X zE{g_DbP`iIuT$Z-8dlR$EL~E=g(PDo%eGE6tLnbJ{Tz`OfhQ?d8S{$IV^UH+y7k{{ z`)xTBoI3vX6xahg;{y%Rk`%4_+bkh4;|_BAut~o;Cw@91oHns*^jX^Ct>^8V%C6*@ zpL1M(FQYAW)BJ$!PZpz8==-AH-b1DL{msp6WQGw@MaTYm@ATTll7`7OICYW2bHV}m!%X@KGtE8-5z=C968 zq36i7-8YO*1abvRMsh~MTd_PTw$bY8m#c~#-mr>eTQ z165@GKC{GkaKT?G*xXrDq@A8wtt;bA4&wK=lydB^9>yX1u2F(n_YBbhn`>@7#%a}- zq;JTOL9$jnl4JCYC#}p4MMM+vW;l}BbSbY>_+6)F$MvF|-o1V2ve@G|?&ZnpCSi$* z;beB!Rz6jG1yVZC_1CA?WwCrxla#8B>0e?^>hcoVbLiYV;}1>>REAS2UvO>fK)={S z`NKQITqelIBoj9t@uuXvh5O*V2?~p|3qZ6@4P-C-{lG2%IZv6`^8iVtS>zeNzH*FD zG#w#&GYPTSMJVQNYnF$-Z(+| zW_!+UK{VEMxZB%M{tYzYP#I*hAc2MrJYMe2r08L$ww(HZJHZx@wPOwiRKu}dFa)MTh1Yz6?R`f?KH2daRQ#oZWG~gP*6t7k`Tz&k)+m$Xllu~PI7G%lQ&D+WMZG)Z`vj9!)t2pUA2)GxFDdJ+*s*VG zNJ?;TZ3UV9WFU-}3x4p20HDxh{Q-zs_HXVX0* z1r9UB6RGiz@>-AwDCTpPvH5Jrd8izdM8xQ?F#W@QQE?@kqFH6%I0FvO9j9W{EH-QX z4VdNI!&xu%n1aEJ+KsBVk{Q&ziSqJ=f@C!i3KO{8!ch~9ImXH+hTDIvG>$Xk{T(%} z`dZ`DqTb=n?593-HUh&{LxfVG7Yl`aB?~}+MPhCCp0@Q!}WgcVo5cxS;-#qRcXo1 zOfmKUmPW%u#=>knMgYDHhk7Pa^t+XPHJ1`mS(?90JA#?U?6R_ z{$n65Zmp01RU*%mktkAK2t^DVj|zKU@Q>p$tDrTmU{ET+KnBT7=pB5$u7c7mh(7WU z?&4PTfd6U8(`e{Zo)s&odB={KXFKIx+#7xqI#V*@)WlYKSG&kI2wvXSyHoU-$b92F zvoqB7Y#KOlB}}c4LT6Q_j`g`-e1~?^*y`>&H*s&iAv_z7%FEMGJ(m*r0Qas!nayxV z_@}W4rVLR@P;L%{)B5)arJQwDI!Wdh%wD8=OS!&30$qPeyWo7;GUWP+AsQ-UWgF_U z1KbOPxu^fgk62+i)f_QRl?=uG;IQ?OFReTiq#uY{k3jk&raK+BjV-V@>BlGHIpVJFslAFlOFEk`O#9d|V^4aaY28b~oN7b2cS(K|=Zb~G>i{z0f+^?bE z1=8at_NH$*&9-d43REScf*(G+6z14VdovD~b7bFlmq2kgTs~{bzBx|bw(@{NN&Ie` zO?kw5YZxe=|se-twA}|tg3Rl;Ya4}AQCgkPo82J9bqa-lTZiHGDAVH zp|&3tBHi4XCY;5mhBF~={Z;!~v11f4&A}j}7U0*rg9hKx^za%5!Z)jsz8BS1fK&~V ze>MkO*IfA9e0rX( zuL$9@qZ<7G+^Z*FSa9ufe$;oPBJd>zvGsKtwdkKHk%Mi< zuO*O_7H&jOt9G%SkbX?K zQSyWk{j{%bx*<{)J{PCM@Rff*=} z8aRL-{lIZToU^m1ntitYqh|9T@v`Lcw?p0AzVF-crmJ^CTety?a|Y><(7zvp`-^ z#-g7{n0@_0my=chPg!g#?-rk`N1r9(2Q(Ju+-GT2J@3D!5rt~I=KU18rb>w-4p!#o3lnYrsQT@ zjWm;XRPSjQ!Gf@Y&-M@X7U? zDY+)naS-Oh zXc12C=WL#gZRd=l&KBc*trH!qdl$v(FEaldj`N3lK{x&e%<)VA*#UukTx)h{34hnN zOVcZ_8O-rlbtx8Lr!UJb%SO|^3DSJD53HHO3Q4Cfy-Dc&5Q%NkImF%rR8hc}ZT^XU z)4h|?a9OXc>P8szjKx!9>->5M>K1PIrHKcdJ%s^6`-4tdQ-s$Zl(1~>x>CbAhs+V| z;)^(Z-V+w%K=TICit3Zwehv%haevdHj(_XfLcFzH8DvL4MXg3}2sz#inbL3V^frl? znyO(jTM_w-?On8!+oYSCPc$JN97G3LJM5Ul>#_y+I>H+KuEW3T%)#!$+e`}1mPx6K za5W|+5aZxt<iRz|z?tB}p*^wRhwxnbk)bDGtZW_&Sw3S@?o*Zz=U|H(zQ^O&7EUe>qv%N9 zat$RQ z`Xf8nw>c}!m0T73A&}FcEAexO`jSU_hq|55vFE2objIh1xIwTc$c>k|#QVY-ZUD|O z)ZIMdI|pOP4ZDGS!;Q^M=WN~!jKXSesA+mw+p%0YLxquMKAVyqkVAeOrz0R36ZQV& z7fYKRqImGX=}#53=3td7r%xj6TykC08xfk#pK;z-{WpdF&blIlbqo=&XbIX@`CH1( z#_%z)ur#M(Djv{hUEobI3p#@5nwqzoe}?AMn{(DYGsjvMuc1=Os|48{>>mq`wjTzn z$`f3def1)hI0$Tq(m1BQ+?9{rt6vA$f_ zB-bMvP3QR3FGbFFf`kQs*%9_SQ7p+4Fx&Icy=NO=%-GeNAgHSyF{R56dR}*>sMYW# zSxuu>bNmsfdDtMTMas*JPt^v4V1Pmr_-<9+yD_z^lHpcIo!OK4Uj9zlP=~tj54#Al z#W2^YSvIv%jzO?o74mKH0-xBKRgXsa{GLVo=t%sD#j}GBb!Uy4jx>qPf|U;SF{RTw zC#uudFb>p-dY))}x%QMqIk)Iy7K~QIhIvLA;*29m!LX1`DNZ;jY z4D*s0br>wmS76|PVF}}8JHp?c|J|72z;wp0PxQ4`Y~>))CVHvfFPF1Ln8}cihkbq$ z$T~L8%8$U~K2dOQVKj=CAdF=<9b};syWnxu?+USRHc_dDrnR%Z+LNu`;;R|p{%JBfJb;L`1BYO3FSbL1}*G8BCQ`#YAbr~-($2> zx2>_@8uv*5+r(>VeLn!Ibl5kyS=j5}sk?uSjm>hAn%cdVQCqLuthDf?>V(x@9`i4K}bCJ_~r zpEjBP&%l6dzhwmwZ4&Z(S5z z-}VGJPB8UGZNwkksK36maIqmeV#|=kZw)3TY9cI7TR9tY8{-AuZV@5ZX2L>D~9V0M_m3>S0)cvso%rtD5O5}HM%7w(CmyaT97NaN#Ke&o-{`g2Tv%g_0H9c}rU z-SY$5+KaoRkcs6#YOZpc$dqXi^mO8aU<`~Ot`Gj%^tYxkA)aHEueh8dd=}6#2iJc< zCPonvtbrZwv;3Op_k=7-t;p$)A`D%=4a=ugwgH3unKpk`f=`}@ho8W5& zMaLMdZw2>>hSaagW?S-OgKd%a4@FqH>QLH)&FQA9l%;m0g?UG zRH(6`3hzkgopAcw&tZ@Ws(_Ao{+ z)d^P_eB9WN>RH*;z$D$CbhF=%J`HT=tZ|%xrWMF+=!j}$9KT?mv#)y<#~65g{m;m- zUBTh2k0Za|gu87d#)hx!DEVwsM6>$;EBF6>MHRV9aX<($40kS%v`J>C2yQJpYndON z{dD+B6T^9DqBVMZhnD>c891BboscM8;A~=1T+G2s-Ga@4Yv0^+fS6gNs`NyXNPe1o zP&kGVAmR6$U^4zQ#)8fc{gH>97C$y9O`4BKbRWsqhqFvDt$-$co!&1CB^6UB-S`bi zVtGt!_0V{vSPL~L31Ee(j_MI2Ye}*!2{J3Sewgo6zKNR2E5gp?kqqBY2;ZWa;?(XO z_`P|kMg=CT{`8P^D!4v*_~6Fz%>H9Ef0;bqqiq7 zegNPJdfWNmq{!frY~A-bqeD4k)upc0^T@4iO9}rsSL2qpGy~2v<`YW$u^T#vyUTbn z>9#kVlf|gF|E$im@{ClC)y*nz1v-BU=jEGz(Eg(I@#Hn5Npj>PiWGux$2xTjT#a0> zoBdds;ZW?N$>6h*l^rHF_cWyg)jh}lo&zZOss1y$X%gJ;9s}@;^I+QO$!T{YNwc2R zM4i#3BeI8vI8M<<()amR@0|~R@Iv)h~xtom%Whi^Z?WZ9a{0X0{fwp-a>E;jRMI1mzoyAgd{Z9 z&Ua-~!f5h7a$lVIZjGBZH<9nTCOeV)8|TcGxf-y~QDKnBjK}4cFPELYPdW|;swv^D z{#$VGkD>gKtB^+{F2YG}>K)x=_;%yg)EGuGz|{ZFJjJ*dw|F>?%hD1;s1dwwlPX*1 z&CQyk-B;_9T~f(-0?sG6g^eMSna>)2awiUIBP+t>PByN_|aIu;6o zi2OjszO*#gKW0lS+mzazd5Yv=#yS z+1M{8I81F)r}R+PNDi_s*WcU7CdgH50$wO%+G>+qUp+V;dCqTH+U4_KJ!9u9307&t z^(7I{fk{W!agq)vWId)Wy{u8gu(Ws@$PoC~oj_arvLS!?fX07D|0hB0Z2W8GJ89{@ zF&gYVSIQD6p#F2`vW*f4aAy(wp+bi6Vn5QVE{{?2kY`eHUf|i!HKQ^}SL|(KGE#`< z_$X~yA#YiCt${J4N+f-&K8pRXn5=`7+b)7^>LEsH`jk%T*6(_bt%fYDHxs?jMCG7t z$#wQj?EwGw1qPv$UD%i^aw_fkxeJzbR#-)O%>;>xNKri_LV$h_Km2brO}eX7_nsEr zbTQn1XmgVyNyR2!WE-$-X699QgkQNV%N$#1<;3(C_rsFMtf^379EwM2h_yhxJ`=TuAXzFv2)c4Prh z#++(tMcejct`TUgu!ITneyZW;s3cg>EIi+=eWGi2wGSW*Iq)mxs4-S)<=Mg~IXVi) zWI2~6{CnrA`&+Rr%&T#>Ule47_2!p6R>P4%T_^J3EBNERk*K_Pw0&2a)4NWiHBnv} zRZNw!LkZx>tK#N#bB7H~RkL!=LA~x)-~XSXedo3c2g?+C34zSlLix^H=rW9pfLue) zWMp|s7)EhYSL9PEgs`omxyhErF?`?7UbOf$CA}tJ}FP6)3)Ui9LCUQ)$S( zE$jH_0D#$83@*BLYzN3^2DcD1;K;XOMVc7Ws_PsPO_E#p-e5VMxjU#bJ|YGVl#!ia zdnYz&l8>e3x%H}%-0s((#Eg<`0s_(92e{>0u@)N9`jA&I$hmf&!@L)2L!&7Ib!BzJ z{Z;7=tS#S+L}t?Byk+b3Xph0~d!d|GWom+jURNQ45S0U4CFS3}l~uFYB%CbHk1@e* zk`ByHcV!}ZBNw6d4*>rh3V_o`@WH<@Ua^#y~IiN`YxZA zP#wNYz~fp1BSzpUC+d4Pjm>*DeVU=KPY4qW5#HJ-M3>tFxEimoZodty@l3ttO(AdL zb|uM`EK-iN5c9h0f_~V1w0gLhGTs4M@+2qeA4WYTMyzpcici)XS$%-=@28(YNOqmr zIh@-AUpw%f^Su9C^vK}n_WU4h3R|0y{@IL}FISZnqa!#`doC`x6BNT@+wWC;GqW_NP@Y4Q@Uyixn$ujS|R1b52pw?zsR zA>q-SF4dMvkfE;NSv9*^cPQBtgL#7rAVYy z=o-y8>)Ir8bTT4~9=N=V33%&reW~JsLJc+|>BWHzHCJ4V=4c0tF+M)uIxE=&k85l- z>key?$17mES$e~gArCF5EY*-V4vDl$t;;Pk5URN`B~Am!)3I*@x>qdd=30vS(sQc9 zR1Q=tytQ-wMcBtEAtb1fP=DWkTQ%nrY@KgS_Zcro!=F=);G6z!svjU^bKz;Hus(^$ zwDAdF17cLYP#96ejAaGVKsJd$o+T~}FD~FePW-S>{K^Kslf+z07Y(vkBw3w3>;B$Y zS)zJMBr+J?l=2>ax?;lT)Xgp-l^t-fs8`z~SjQZ1^X*|6&J?(S+c<1Z*(EhW!K5#W zXO`!$geAm=b@|D^kkC<0Fnu}fQ12(RQr-wCURhVb@t#xDS z5ub;O*C#QJF%$CwRI(3X#TVZ`hJa*>OKusbMDwaX5~$;puA?3|VPl)zno?*HOrsHkWD^N^omZA-N_cQ;-H0|)bT)a?#lP|*F? zrJ2pZL~^MPg=0pFuxOD>G#0H3$9E0#9I_h5HQv8q)e7V}5`ssB<8P0+ooE*(A;wz; z&1yv-|EVJj;cG06hPMptE{O;1hFmL_>|1WiJlR=I_jUL8xl`uzy0=Ifh}~p3zCQ|V zRv#xMwx;MJj198Q>ujiVbzeR_Aw`xWlbOAI;qf#7cBuZOhVQL)zq=}bmd!j`Le=Xo z-AcF#?(Dg%y%i`9qQ2F-jB=ofDpJtc2t>q{KZ3=S`wEcfCIhHN;E=|QzJUWFk2e=5 z1Y|ZWx7?_Q+4xjg2z@!fXS-|5WRP|FI8TA|khb1TB&};Gn!t7=y*ZO~?x~jwH9(7) zs^^oQcWnB8O1uX2tXPvS7$ei1?}aAMUTJIWAy@9nV;P4D>E-9;+5JSNoTyA zaB(_H{kfx(!Pa2@aXN+ExgfvQ*OOSNYOiCQ22EnHA1o8csTOXJ)feP)pE zAx?m)7g6+w^X!DgIibjHyAE;+1Dn7qLCRNb3xal>71H{vreNjFUXsYMh)1S-4UARp zAIr(Cv!D3`Kj&9y8}$w7D0WtX$Us;*(46{gWDx2tD$Lo9U!|(vccE%wEbWN-8@W<( z!1Tw*zjj-MO+ox8`15bxxR66NnFgxTr}n_?f#)@~Y00 z)r{qr7a9_hLE-OYM=vpHLoSa)1swF=wsQX&r7}onUAhN~g#QaW#`2}3tYQ!8s?vvT zOA9C8;%~QKNA@9~-=1~+^60B`u0r|J-+VoA6nZ_fjfF9cj4rK-F|dGkcWuyWEZ*wf zVE=lXbjEF>6ZVSt0Mx}07&0e`;qTBfTejDMJ}(C#9Csj;Tc|3c#d0O7T>MRx3jo84 zzy#*x-tjk()btqIE8(DVTEb!+Q+8~3k~W_;iE1E;a zqG{@GUElL;Zmf z8$gTxDKjX@%9M#5W#5M=OzhYp!QD9^D|l6fYSp|%>4Fm)WR2EjqNf1Fw4kLrYKZdO z?iUN2#JmKi4Idb-8s(JPR^v1Si4@rx+0>IAFyJ_CE=pblj~iWOioH~pmRX+RxrAPZ z*>7iIQxe^!3s(xeb_d5t(ssE1MHyY8MqZ^?E6@dL=1{vC%3Q1EB3_}`LB;VBoZ$CL z=0O`~a}utT!&28_-bi2Kkiti)=4Mjg3i|tu6EJa zZ4=9Q`GwzE`C%3PmLkT)USO@p4QS!kN6cS0weoh*%gOhzaHlZ(HuzGb*UJ3)qC8$a z9ja6TDs^Z#5+d91jW~VyhhMJ}Hdf`w^$+qR$dpuBPq{PgdZpeVLQEvBKTi$#$3$3W=|XpuSx+O(3n6 z`M<2V#N79pdL)_rB&@d{xr=UrsB>(P0{H*cfrSn zW=MZ`V?r9B)wOgb%x=4*Sb8WYoyN%nNtM#(4<6}g7s-UK)iQhudQrsSKk-w;oRou{ zc|Nq_&7ojNJiH|N;he}t(#6air^weR{CiYu@3_-0(%c{?71ggtzf>s_c-lL_aFUe} zCyo{DZu;xa6S*#0NIU#V?I#{+RBx<^LYWkugI8l1PdoYaR_2qzSYmJmbBThWkQFB(KTyV7zKw z_StaY`|@CGdyFYhs1N$SD0Lh5_=Fnf+w~t)Vq^`YxLbUIA ztt!xUu)f>a?`}n^B;$G>8o5x_Wv#l1D@|o*-c=!yc>KNDcR(1ENdc6f=Xfzf#i2Jd zcy~NR5?dG!h)d7yn^=xHFHSGjZJ-+ahpxG;g$dl!;~V#>M$|MVTl|70fZ?=5hVV)` z4mvi7s00*=IhMD0CNzH~6mH~7D!Q~1?%;U}Wx!^~Y!mvl&gj)xdrF*c@7yB#t4xDqzO;69$88|LiMk$r-rtDej9>jXFZHNy%<7_1bJ1h(Y}dJfLVeM-{fhld+S<=> zLS>kr0c0DJ;Oc~D=1uzw`u#?ZTw~$Zx68(6+!TKZ!O8V~x&auui6qnMPgA_yWHt|6 zQJiG|0Y{bR$8rif!LY9%;rygV{fATUBZX+QW3-zE0$I>E+v~xEeGYX!$VPAo%#JWiqpS93V zB~BaMU#0R);#;oxVp$Q?+u{u!$|k%(8dHl(;9rtKm~PhZaqHE%XS7WNR+eVWlOg7X zVnc~nLr}t?zF`${m~S}g!VGE1w7Vuxv*r=QL%l!V5@weVUB%ql0dY7!LZYvpqni6b zeT(H+B)esHZb7)a(J(w8Z3f8CGDh8Y_Pt;Bo7pOe|K{@?AvfHo;6~I%&h{ul`hvzohRi;l89& zgvfpg8K4vzzc)^eCYkJHaBrU#LPReT+b&Sd{fnwL#;gN?8ueWohvx4Z3O2l=IScrj zKhU!4#!h-x`hC2enk;IeHDU|?IQJMcleRqX1q+XoBpc~~%52Oau314`YM`VBuq)Lv z-Rzr3if4tf*aV=iC!b4xBC?zv6wTR|-M)*Qg@5!#og!J6jQ<;q^G)oz&D?`$4Ho%( zTRSsI3So(R$Kw9pyGs7yy8>yIM_r-iY0tW6RkNS`Rx+)yqT$>mwc4%lpMzc4+Wzdj zQAFGQYI*z_n&_rlT`8cCT0B+HesBWQ`Y zH+%9UJ$zA*vj@tNuaON-6_cC>{DE{Czqjm!tqgd{JV|kHTFmdez)CLgSS(xn!PucC62@;Nt!Y;X}vo)prc(xMxIn3#QFLeyVn^ zH8CnRBF{K_2J!qU>=fUb4yEhIWVx+&^Iw<0mx|nI!^Lmp3;PIey^P!~cP)0(u^oj} zDDzvjwQ{MJ9zMf-Z$&B~3(zn#Yuvgxn*V`WCCPoK*H_XXq;r@_O1m2mi#1 z_{eO@pEM#m;Vr|&2sj#x@8k2m64}zGNb27erU2Rzi1|M)z_$%b5X~fw%vd?CHN^?r zq+-yeLs1UYw2egleZ?!P(k+r^dyjy6Z5L5>+Gw|oA;|T`o5v0(1x8j=!5C4Q4O^Le z;>m=XEm7QV-M{0BJq(Lu=Zi~tqTEdDTjFY)XqpzvC+*!gmF}DBXv1(44e|Nx6e*O4 z#Z#B@x)EzXg#%al2G9ox3U0S&dYCzL(0yBSA=;nt%sm`>Ycw;}^57&mT{zOAxE0*- zdLyz|KP!RY*TSJx5iOH&SwDWr-TsNoA25uhb+tA;by;diknTIXW)my&0djrhgV8{) zLC72o){6XZ?m*%^<7gR`VM&j+tR+xJj-^uxVwac-X6<90P5<2D#8@EDQ@(VIvOGu{ zuwF}#nLlsH0?4c!)*lJ#1%@}Cp_N6j%r3c@C4o^nz6P=dc+d%mra#> zC#2wSxhnr|)zhcm>Yt$>w8*`p|zY72ZmCqan+b`Gq-=8ugINVptsgz0D8yAW5yQxsWK>_LY2T&o6?o$=M^c!${F|R zRRr)353RD7aiP9m8q)xG95Ok`@VBs7t7e4sHGcn2aVS^i9pwQf|D?+AUgxka-s~Ls zyas6B;+K5?DTOmKFV>h1q;Adwxmusm5Z-~^BUM-p{O<@C+!obS!oVj@t}GBXh{w^e zo*^Jh=MQS5TRQ?}0v)sdG?0*TQ4{ioNWMs7#2ih9UCBb~ND>;`S7?37@KQx#V;h_u zqhEIQ!PHEOd6{$^o@A0d0gDrTN(mvWuMBk5b9!90=qW%_{5j;g1c!%G^@?6xtZvGD zo=;%$vl_A4`$kssw1hyXK;Kt~`^~~Wj!9Sb5Eha_*w-0fCqVyl4(%(mO~^ZFi*zx~ z@_B2md7wr1!>4O{bNt?tcY+L_;aPy#L`f1uhP_`aNseO)wEg_a?&3)pwfr|UDUQZy z64Igg*Lhn(-y^T<=JT5A9H`wDg1we&$7|_10R$K02{{;;G0)?-8)9kL%<(`2|KtQy z&E@&@HOH!Wxte=%0G9zIdkPd0(txG1jFeXx_YgQ=!SI@kEE7PyiZR>vr zQ=~QnyRY7RBVvQ}OcP4r6R~(A?|Jg!Hf7vh$Pc~Sm-?<=y%TlSUP~sD>!y9+04arD zC3Cv@>ElE(E)o%{zBF!Op{rNPb$N&A4WslU#cl7Onk4t4O|BK@C3@ znC{ce312fHEDn5i+1L{3a-B(em;aX$)2+}8r478_4!7Jlwx`1=%I~#&)WEEFIq7X2 zl^HZ^Qt1tC)$0{*6nu$tLoNV+#qw)F+x|18R@}j3&&&CKrs0)}p2~%rwUA=+AE`fe zYK)vrbkl@^e-yx(+UX+}K*D*8%OC@>D`_yM7WVC2lx|A1j`K(8?YscF)m`uWrz>hx z`B$5_w%NO&25ChO10CvVY^DjFpWMPexjefA-N^18T-Wr5pZ>``V6cq+Y=zMEyrq_> z``3qLwTrIuItzD&wF7;p-?kcl>+f2UwhUsA)OC4k+3A+uL&H2dxi{(A*q0=s$22HDp?kEh!$R|^tO*{mZ>knX+GUY-R2Z9tg;M!KASOhk; zO$H>7ZMoH1!_~)TZ;W{%cx|j#uksb*O;%1x^N+MQZRgh~GRVxH-MFlm_Lp77qLE7Q znmKc(x!R_oy&dLTJaL0Oj1j@#STWxk^A4efD$l0lrNWuroxRd@9N`pK6yBh4YuOs3 zhB0SI64gr+-v9nLyH^&u^MJy%qnXt!6nXYH|5pn=aKk*n4c`vH+oTmeKk$tg*}bvG zXm5pyrX})LY?9Vf8WkSp&@|sBrW+irm?GCEtiP!fXJieFxb_3F5{h zD=yJ~hOYmi>`TC*Y`cI5p~zlj&n{V#>{~HevSlZ0gk(?lZM4XgB^0t2BKy8%oib>_ zq>O!;h{;%nG&3@0=6~pYzwiCN_x<|+f3E5BJdd1n?sK2*KIh!`Q_xv4mRkyPp=yyR z$P&*#;(K)OvC8z7G6@Kl14@>wiI6t52%DB333YHcAH`~`uIG-a;8%mEwwI3*4dP@c zUp5D}kJ&Y3AVTlZv-?$^)j+iF^VOm(T6vKYcYk@Cg=L8w^#V+cR7@?zXv;~+mk_9{ zC$C6WyhE`2t+IJl(>Wa4F<8*j_Er}*OmBzWixNFmm3=*T%i4`PT(g z?&akK>X{P7Vbi&X!A^^|;G3gH^iDk|oysFiR%n>fONiGmV9JT29RR`UdZnXzZ^x1^ zjLlN5Ta8`zS0c98-G7-r(5%#4Y0;^bqcNM*sfn1iqHnP8>lCBs$$FH zogyNNu=Ak%OEUqCC|}o+oP|$s&!qjBdS_!oyFaKm9$y8mB3@D|dXLlkE7X^66;jQC zMH4F*>V~xwOMr773gMw8)3+tHa&`;sM~}n#hO^4GSF-g=a}y)t3Fa6^bRB zSx*o+U~=(YP+V#)HKnF54$%Kbsk)m*F`_YJ9bDn^BXi|%G(StWaG^KA)26q=Ugy64 z&HygeQV*S~E+G_mm6x9f=0eo_P@BMxP~h&f)e>@C=pC(35zy5$gDZqqLmVnPyGeV^$~c%oKd?KGjz- zn3=Xq9V_v(k8MMOb17@^`h1b`W&X4GCU%rZUzcP+jwdFk+AGlx)8Ij#P25H|6y8Y+ zkGNUpIy=Z`8Qj{=NeY!PNeES}4~izi{ZfRSUBpvYm_9@2^}9A4^B!jaWJeCkTti5N zY+t=?!tR&FI=|hHB*DKdJqK)wMwP)Jx93WWD)wHYE-6H;&15Koi+fwi&D~)vxSh7w zt9v2HH7r5v48h5*c$ABsFwd>>#3133?WRGOH$%O6ov9uX5C%WMw(u65O!utLZ!=Lh zto-h1BKp`MM!O=o5J)p^!vrD4iMM&H!v$3C7&sFULQ^iHT2V|(X`BN*XEaC9#bU&& zDI>6O?L4Rhgr%BembTp_TS6LvUVwL>BS&D)1=0xR|fVy82oiV^d&zHxldsC~?h zMUci@U{r8&%4k91;}zJU1;XOwsO^mLzNSw#!g^sm-0K0t>O$cXGDf1^!#8a#;~$^d zj8XJ)xlKGida4!F;nB2Cw+HN;6UC)ua7X$*%h;%G>v;;JtoArFu9*$i`!iIf2NkNq zG{IY$3W*V9t2dZXN4jW@;`JUeVyEJWSH)#Vds{Z2Q@J#PAiPMF8RE9H=&*I+CDFAU zN8RX)8v1)xh?mc5J=p!M3&!J)g`eJ}Lvg?g;041PVQdHj_N{Z4dj1-RZI4vB=ZK6gS2g!NB>yR#1ys}w$c9p8L(j%I}?kOgzC)gcE+;orET4v`p2phZ} zXGBry{bGdNI_lKq1q*^FypxmgZ`9nhbt_(4Q*w()pZJ&?l8Di~<@H4%apwj!T~81^KWF^WI{P4gZCBC(89|Y#Qg61^xd6U4J)b#rVgh@#*(*~L(Pi( zBH9V6}*DAG@&rMFMzZ>XC9G?ud69Ld6C%W#q_ngP0cf@BeMW;BY8eD6p zXB)o+zFjv77kFNli2WMI3LUKx??Qaldmv$Z8neK&TlK+FSbqKmY#_1Cg~j)%^lY5W z1yOQT%8qQb`c@}y7d>+776S3pxgvGx9?z~cCV{-V4GVWOb>mog_Nf9M6>IJmdrfVt zG5e^7$>75oV-1NeGS3rW`0ag%8h{pb#vQ5IEZ;~+)2P;3+`MjRya3>REyW)Juj?~g zAjYPw-mQg#*_$1p3z{N*Zr8A@`C!es54Ed3dTc&+xom~wwxR`uQ{dMh ziHR{usmH%#%RW%_ z)!nr>njIX44&v4t%i%>Y-7}3C@!P3+cp7ejdr{w=Eo(~^(EbnFA!~eEQ8cR=rYX|< zbIzmXe`qdiq~%Ec#-X$=Mvu9I^HtX!Kt$CFl;HRoq*zFt5;f zh;U)HcM&ph`@#;_blyfF8F^V>*^|hZNeH6 zB(_&VW>wH6zO?Aj$|O7sh+^B@M&!7^(dcMVSNQO)=}s4S{m8dv&IQb4z>#aAM8FFV zW7u2O6L+4*b{DmsYj8Y|7yq~g$G-fW!>FGwTJAG5A4h8Y9G`T4JDajsF!#l-gen#1 zf(}&!!ah;M7x^rm>cxIc3rTQpfSR|JH>d9R#@=i7YO2-~)#zz{6)1XgOuHv-q(yQw z2o*JSPsC93%PX=GGOCk;QRL0Yw|#-3HetkwX!WKvA?jlWT^Q3u%lW5iYK@u-r>ssg zwL=U-)gITwqT)HE1hz>>^P@uojW6y!lsw!Rt4E}d7rg8@l6(C0nh%T-9$P%Zz3#xO zTG)=XzgD<(f(bZD>!YIzKxTRg&x@NvNwjmin&`*Omzrh`>NO3<(M{Ngz(%;&y5G^j)&3kGusq`_gbIK~ z;_izaSQ~u}W@CdUL1TP;s%cKf^$$jmKYNE!Te7c5T)aAJpUci$)iC%xygsMm#d2rs zA*GjB2WdF`%r5VpiIy9RKnO8=FC7ox<(s6ZLr%+rJjJF`4{`d7AKM3gFx*eW^qvw^ zsp9zUu#8b>+HYhMLxHBba(n!Q%cCVD*iEKBAWVBdsW_6EbN*5NbF9|HoA-^ z^?;{UP$DG{50!1m8GfpB2W)?r@6o`sb+;Cjlv!PD)W4iV^?`|LR*iO=u6~~kl_`3@ zzLLTLS7QZHGPFdgRiq+8dxB4xd)ZpGRsEPsX>yP(jR3&a!)qgYlxnH=a4o8EsS)z2 zPr%uM{Z=zBWqS#GCbA!BvS@h61Oaygu!?`{19)1Pv~Ds?d+aHP;n2M*OY4(&tbXpG zsky|bTB$=T13RfiJhaL(IBCcRyNZ#ro5l)XI^Vnl3;mGRx7L^&gAGIR8BiMAi;kwd zu4DJ11ik#6yzPPue536O=umx*?YU2`8S$Sv)_JM#D;{Hn&vQ}w_g&?|JlSX(C{tK* zA}Pkc=KB8P>j%SqnrYFgI9g*r$xLw^TdK^53b5Amis;53gtSD<#5@#M1tvPff|{~e zyicP(k?S#1dRjswEy`!Qi-P8K@;=y=7m-go>uSGyHH7sRryptovoA|+yBv_{yuku( zkugNG%8G>0MGK@AKYtm_;a5o`==;33h_muJB5lLs)5XkKFn&Aocwp2L8xXNw;M_OCPl!^IM$0HmJr+}{QLv{_zs4-P}xA6q8L@wLqGj<^QEwe1D;!bzkItJTGl8aFPLaEkYT2TwKtI5Bxqde^@Nv&g(+3 zfuI5J=Xn?YcR`#Y%l8~6;Wqc)!E)p2gZ&8{s4m?7;;YL8s_XYTe567gXxiu|s^aT^ z%`%V_c$?9>yd@*NwENH~!N5=)Npi?8vMLw@ZMG zY|f1dz&hC1(Nw8TA6=vh&%z_ND;Uz7fHBuEfgA1dgkQLg*Sk#s_#p~LnUDb z$FP1ne~ zkx|Hr(+GOB5}!9sdh{I`6Gx@-_`0CJXiwGYBeNCoWk?)o(V@-7`AbIV+L#uV_O9|FrBPi zM#_R={8Ho@cGy--tjnkf%@bI&cr^QZElsR`^J=X+73GkY_0_`aaK4F*TU$*dFl_m< z>odI+UPG>UUXxB1mY3dpV$dGTFmuwp!GXR3A~UL9BQa**m?Xq*T*7YCN$VOKx_vlVpwdd?ly_Wr8S0o>YY*?5m#k_ud}2Udlt#!D^w=7KJ4Wuk!cxqI_IK zCvOSSAeFZGY}WBKFDULLiT`^xCo%8*SKW6FbZIWF`Pk}W2!S=i$U^nWlTdbZCD<4tx=z(r<}z;PZbA5KZd>v`Gx0uj=BP4jWr*|xZa(! za|NHn1nX576s%gqyXChEV!S_|P;gjVtVpt>uXzXKHG(VAsj%*tnNsf5UNlPj?A`Nh zXEEy0#32j)h6QsNzGPlK<}tDBmT6JX^fXha(7Y&7KDzc!5YtlXwE?yIp>9j;Kq%SM z+K=s1?V^YpR}R3Gsar=^5RoCG25)cL2&c%6otcY=ZdvqQvtTy*5pg4Ti&K%aO^j^- z79z9GMLJO@nf7C$o3?-FQasT=mjivx^|O-OG;f=TSJLb8Hg@-DkupRCEb9~@c8a;J z>fX-0ja!mwTjJV4pnIRY%!N+oVbs!gk^Q(rlTBh2kmA(ZW}x+Wbw*;0!paMOziVzv zQGn{YcpT;Pz2xKtEE)y1Kt62uztRz+>|Ya0m#?zOawC?(Uv@_q3rv5G!vh1QSLW}V zT&m`6ZvPSlGlyzf=Biqbi8DW0?(&Mg5YqR}R-ECkW-IK3gUx6UFj@?7N-ZLFeMA00 zH``5464i(Lm3m%2Ufz2j1N4&<-+Gc* zzW$ijKL(X)#NIGN71HhswrM$jm**?^ww)?Qbd;4CYX}xtzb-cKC%6@v1lsqRP}ZJmhu){kI#(ejHDpV=5-J12F0kkIk%Mp%@$ zpHQ&JC*Z9*HHDc;Q7~@W{_va$yH|k^V*-q$U*&3n3-hICXwt$%E)`+5+;m?LixDZ- zrt>u8CUD~OLDBc=@Kub?LX5oCUiHND^PP?Hl;R1Y!X=0<5Don3Kq>Mu5|Zf1V4u|O zt8PVQvpT016K|VK{2{+Z&g=0$ zuuwrK0r`fsRp=s2PP^QO=6o;|Q48Bs#hO4>vC7m~iCC|_!)-ZZ%u+*i_&@leq>ET# z?iP??+rDxvDki4*+6?Rc#hO!R`bFT+ON;2)=zUE@&+$5*k3nIs0ehaE5>>y9F8a4h z+AB8(nyJ!D?D9vGvUd1w#SLdpxQ3y^U=O)T4#4VD)O)8cFcp}ek}tz=G<9A6P#2Tq z8&o?6!YQxEdtqoOwzZmvaqU80^ZP5?k``@Vx~`_Cc?8zgxQjA0(GMovoxet$XnW7O zG1+~7zA;|Va&PUO#Hlbm{Ea4#SN2}i$vI{FcZT#XW;FVi{<1#DHt7~{q-Z?~;MC9Y z0ZZVEOlu!=rNGpQzKJz1?TzN0T0^ku zQ!H!nb*wOZ>4vVT4oN&vpf8wgOl=b&H23^y)*DS$!=T1)3QM;;fWcL7Oa zZykzH5Xtc{1vpymFiRrXq^^LH<(XFAf3)42fu^GOU)c;P)|^ zAF7-?g5S{+1++F&+=S~WJazrN?$_?2sFX}0{5?n8X9!w3MnZ{SI>%>}kNDE}34`=e z+4oJFp5CS9y) zbbxk7Hsh;8m5=h(l%Oh|X#M%jY~3r}wERNn zEE_YF`K}uZY6@u0`xD`}uG04ppG{Li-LhCvT7=SQ*|KAc8xwo4GV&M=dUW5JkbI(C z7@@j;+_u1IFu?R=zN?qD@CH^sZkTAg&?UdAe%;(nU;_u z%^v?mn&g^vK6sBz^c`Nep-B-ThdP_?t#@Z;1KTJhZIoO!wpxiHFjLYvgEgJZpD|n$ zWS-SWpz!^xSqz-KEfW4SXF#O$=8(I!^Q&d^z5+;8VhH&TN9*J>?d}d}2-yLuU?9gz z6rs|MXbs z0y1Dk)CD)}r-yg^JA}6p8*Ar}(et=?j{>1Li~Cc`tDR~1{9tZ;?k**QW$S6=*==oG zIq({dfLXNXEZfAxJ)Io0&*aGn!}Mb|qTk1@u1cjP5_*o~l=)_i^2}83{MbY0z`aTW zXi_whh<&&IPG80|*^7#xQ% z`mF=N7``?E75W|Cdb#WGSzD(cWq*#)t}`EQ+fLMw%ed*E)lPX1o=dbU$Yx5+d#?bE zInO~H$TO~@uQvQDGkLa5@JSebeUHZgbt{Qc7fpEtfF4cAxZb7Xoj-o`(|xrEZK@-b zj;S@`X=pcB=Q0%w+eNiuVRpW`3%*v;Qv(?#57(RAoED4%o)&)iL9Rp+XPwTpSaQTf zcl{74z*fW7^6UzW9NCII5`q2Sog0NB;u7y?!aUz)3;HW83y;s_;l){!kzW|U_SC4w z_}MK4<$tCPk_+dbUvS^?EUFDne>3*wk1&vu@vURfr&#vkBl1tdkyju0NiB6Zh#T{1 zP+75us$e9KMY_6mu3o}N*cBlxr;}QfSU3!A-p7vT8V*7qcg2nLwiiQqr%seTS?$Zi z2hq9G>ec#Cz~@ZmFzKmj(94Tn}_S^(jJ5e_py+fxCYWm z;b=v^4*?wYe3lJ}V)htv_fWY0XL1gT5q@>1fiT}NG;k7dOv}<2cN?855-WgViH6)c zaxJdxRofRk3m*&K@iTLR?2G*N4T#h1F|kcufeuPJKiqxYh8A(O?oP?ySD#ip1NFYv zV@Y9>A6U^vm=ta7+B-KtjRgPD!z%+_o_Ff`(h?URr^m!fj-a3T^)s&Ekiz5Vu1sU= zP|b0)sTa~QnY+@v?acizHZrENp6ohY!3X1fQA5cZXALcCrE+aw46i~s7v`&VA>7I}rbwK&axz~DZv`N#d=w=j7#rzRl(L@BMWqo*l z{rs_ns#Vncku0B)j%ji^ z6XK0($&=^oETAS|{F?i!a4eX}IaHgDo|Z{*z+YM#BL%*jU@`mgb09rNHfviYQ)V)t zgZVAg2Q^isy9GpJ9V7Y7=hNHaelQ!@yL@KRFGP4hSy+$C2Q1W%TM8nw=en() zO?!#bNGClCkDH%dIiC*E|w`MxuKpbCjn-B+@pbc;D zJJC2ZIzL~+PH?*YC4Xkc7N&gDzmYR7yyjcZUA>kF_b8Ez^qq0v7lx6`k>R$RUdhLI82x;49|JqtZwV&RvD%@+MwPFG&Gnw-RTL58f`NZkSWet^Kh4cvpH|NN>B7G-?-cS=fnkL zmy;DlrE+77ONrwXb8xsBl1%4o8W5ejM+)Z;JGCHIG_{=q6Ne$L$K82&z`j#x zY6sA`vc`tSmD77$C+b+vyh^ZJrXHYiprKSer%-){(wuJ{`$&LMRnCun46BVir1D5| zhZN|g`wpqk7pRsL9)MhuDncQOj}xL4wsyNJ7GfK3ea<69kiD7AXXt2-05 zmj_OGv3i;LS3D2EC2+*A$HxKGN@FJP1M+8}{6y0K=c*~au;HgO_(OrmHXSdLhQ!%> z{4abhUJ1!!M_f$1tyrxFL=zn?3ElLL`tTm@trJ&Hg-2^|O6 zcAUvEzgj6Tga=rlvKZRvVzNRLdns>8!_~s=#O53YskLBOHQ0H~(&t;p_#j>Eli*xIWwu07!+f{>R5qp%HW06vNX=0# zy%RcBn<#px)p`c&h|&j-6tuI0w3ycP`9A&(U?L&Jx)ZU+qH(L@^YkenUJcQ~>F(M# zPY^T~ya;M2v(b)lt=#OA$*h;zj$?}8w5$r|ml`_0cP9!X8K^@l?cqK?GDQz=A>Zj+ zdUEuYYSAUpYn|V76x*l##`_rf-Wz@Bt9Uh&Ln)v!(Ff;R7nBxYef-D!gHPAdY>MTa zDj*)KV?_GhetO{;dE!CjLuTBA2pAH+H>d_Xb+k}dE(Y%N#RPt}QmX4vG~+~aIMeu- ztFE>vOT}dQhbm{9k8gF<=z%?xZ-4~nWoZfY5gTvOZau4&pVgXRqvZegYjSUe~e9X zAQ$z7SJrJ`%6bQ7szVTL=rGwzphqWNFXu!2I8EH@t&>2U6D&uq&yK#)4gRTcRv|H1 zl^5jlqTTJ3$m?F%9LxdQX|UeT9Imj*oO24SmAW(7tX$_v(eBat8nVj~s0M6 zq~mI+s?0iD3iyiJF;f>*&EbJ?%2B^`8cK+54+FV&$myjd15S93~dj72fTll3RM07Ew=VV&k>FvIW|VMV)YaPCW1W zbF4S|-cvYnd5ygKlwtR7xS9WvXqp|3y6aG_Qr08nSe<}?w@(@NvYu%bH9U16^zfEiBJIrO zpg2pY+sOeMUba4UC5i9-RlTRth0Cn(v?M@W9P=To(l4WrZhlB=akzy48H{?9)Dm?G z|MaFQrTO@#mT=hHgs+Ho990-wVI0|DQ0CTqs|D2$k#i3&9p5P5QFQzbsk@_AiXnt+A5;g{Of%x1|D7qY7Ucbu?7tx!1(n!6yFxCjLX<1H@n*B0sRL!s$-pS9YC^ai_kD2n}<2?wn z03kkQ3K#B9FJY^B01rvG8nc1r9LFnjg+Hg{%*C0k^yD<~*7@p9<6y;#Nvd=c_Y5Li zPuJPR69%^GPG&bsls;*wfcV6=Wey0|8J@5>64ZNl!1fIqEBhf#$oAKUku70vQn zKQ%tAn9%C%b_}n>2`N(57x4Xey`b2i)t6$%Bkp2Ywwipz7NXuUgVVkyL+%A{C_Kz& zY=62V5G{s8Ff!TE?g%afT3|vL>Q}xhp)}mI7Kf9jmtuNU@89V8EXTli{@B)J{Euz* z)bOKTG(8egvNu^8SVkQOHYI;->n-4HpG?BexuryH=7nXxW~XuP4sy+6SKjGEEyI3q5Js84-2K^IxIvn>k3vcXS=GlP&gR5465h-+5=kQtR=S?py`I@6d; zB!**;F7?2#L{^T8j#=OTelAF>)YJ#i;1-cF?Qx0Wj2D9^@#9DLWZP~J7ZNf`LI+Qh zhnYP%8VEWS%E`i79BT%7cVhVKT`G}ZZz9-;d2|faO;LId=_Srhyp!9iEbPGy5t%~6 z2}wfBUIk*qYcDlFun{N6sWK%Pz{%7=Z&rx0=<-#GFez9LA81dkcpVLUu9!->vY_fh zWtXNxkxy1xyu}j4SyH=v0#kP)&BvQ|!T7BkoEUPGg5w)nUfs8}C67H4v7WjS0y}1R z>c84Qz2$$`p}badcmZ56O-fYE>f~bzhUO{TvPSuk?JXT6<__~2uf4jo^gjQ>8jTv) z4#(DPh~#QUSO1SQOW)nAbVBEzp(R`I`$^Y%7m<|I8oj)2Q4L8g_TafBpw=y7Xt6Cl zBVs6mkb?Onnvg%7)%IRKCmhsA1O7D9&RE~%F0}BF{2iV(XAbB36yG=NozLhu!E2}C zB4BqrJf`#Qp0F+Ar4Dq1eB z?d3H3Lgq<<-d(u+30+^#$Q+fOU+Qo6Km=b~*C_XgJk#4_}%S%cgX=qBO=0 zq}j#F!(?4@g@p1Ct|hj4h4HAvhhU9`FE!5dABC*3sB zGR5nbFa%H1#e)g|a_wRfY9!n(#>3n#-=8jx{wq~cqK~qD{@WLweD&iucErf+{Qb+*$ZLKr@}-X({h|f7{ps5H*HZqZgA9wPcS!`( znXI)}+12U?HK)J-@xPu}0Xfc>xxS|O)h%z=TWFpAo=-scgG@!EWS0||`wv3Qf583+ z03?u<#&8xx9=?s$ESNaRYyiK}%L{@XZ9u!t&Hwr2U-;O@DTQ@mdcj5u$uc z_T0fpOW4p3*ATtN)SN4h*Z(*217}B)o+-A;14UDxu)SJX3=xTs0ASUY2WtF+XsIf8UmKb8eWQ_Lp$x4Th{jL?C_;c`@Sj9m)3@IG!~ThTXL(8zA3y zz@0UbYlyx<>i)2Om^sQVpQ--r_V_or1N6Uc6O!d&p`>wh;4m5`R4IA&sw-d)H0mVl zd@0RZbs}}alpgd?&A^BkBfq8(*8+HCGJ^)t==*;eeAyb^4-ZEU^%Gg`pVzPTIAhv2 zs5AY)ru{;WOHj@zNS&++eh$Q~+{Nnrkbfla&xd#R=qDJ%yFbVO8cg!ZLbIk>2FLk* zDnb|;3BMg3KA1(&SCKN(1PV>-WNLr-i2CD>A=L8V%7ExYb+ltlXip#TZ*;MrI_iP5bRz0PMYVv+;J0bQ?xJ69IExj_0>0{e!=YFHZen*2-N z3}@Y$>-@^@JQ6K;0Mzk+!*hc{_u+B80}g@~uDiI<9cKGSkJTT4Gpeo^rUrCQ{tt@& z<9QBH2m62epB85I2lbo(JJ9m7->~C9kdIpKJH`Ka^gqC3PPzX^ajZVwawR+(9}0P29h!TIlB;U51^ODz!g0(M+>GvbOnb5 z=r(AbF;$6n~4c2bArfo`zeDy^CZ1+Vx)=!VGo6hKEQu{N;|`lN1sPGW9x7$ zT~u+k7q*+v?Dj|j!mG}I_umBmg{lAXusnE!X7_B3C2xwy72!-1BYcEe5h7-4KPe5m zPh}?FFLQHz@Wma!hFKkJT8hV zidur&1>{`yY`^`^8Z85h<@Tc-ik;oJbbpZe>Ni{{zf?;dOW#?X(5+8Xc&LRIPghWN zb-#P-Nu%m!eNxMdUj^d>P?)p7Gy5fMi%1x5)UwH|ZU}NZ{>cr2|4@FB(Fs#}Ai?@4 zQU7`24L-pF>||_(`IS-I(y#!Bb$pH&gDgRCkaZ$O-6IBN?8=3DahB>28*}i-a-a9ho(y?Q0e0Dbz@%1n^7_i8fGqg~CN;NaHa`I1bz{=uG z`){HHo)%7OQ-da#!+Zumi!^D-NA}E$V&Ytzl?L|9S$}>lSbk#wOB$G|#)hwV@P;d& zX)UUcWpqp%QKAcvySVsQ>r+nu8_Q_HT?|2pO=;5`SB4J?icR6m!w-7r|0E#U^*0v4 zs^1hA$?eAl${Ibh{1f(>+ix0VGyKD>c@4WZ));^`sN@hkif%>NvfBJ7}HoGvwQ!^`w8_$ ziZ+X*-C`TZiS|GiSpPo%N-APl?L>>$jZrHP2BJjjF6d3K!@c5d{0^->8o^5j0{3rV z-7tEHGL_L%(l&b_n>lAj(S9RC?ylDjOhH%07Y9e}}oO-e9$XY#& zGJQ6P7!nU`i3pRlr6y!{Vj(Z$N%1rgWsuRaHUuBa6Ll122KRZv97R>>(d1;b7yZC4 zGaQ>FG1u6b_(s<TbcE2 zn?A03yz&ILz8kVs%v70%NrV=q0q>>Vz!fjrObKUYxhC#jUla>0(hr`#bQ|7QIQ$B$ z2P4GCiS3?fldTa7{ge8H=HI!zPjwlWbmDA>+_f{VJG@w~7XzAa30kOmp#7OJ2^3n? zP&C$up44ds3U5*-JGNavtbQL}#BR{>Tw}Wo?BQp(wOY8H^dQIL+&kls77`fNJB{DY zT+km7?Z57!xO6B(UL2EjuU=0V3LP}Q%<+Oxgy-qa3m+317jMhUJ}Z)@Zcx&U+iO?g z|3nDectyyI@}H0Gs_4ELqS>>pfdHB)^VDYdv}ut3Mk?uFnyMv$-XecOQQ0{PwoqHWxjS(~olgqW~Mix8WTQNGCW z%}I>5ja-F*U5tlmu&3WV2FDbIm?9&6DkPK+(KspI=XOu2A=iCTP(Hv@cjIyF5#)qR(vL3^1#{gA1<2Gzn>}JWxeUt)MHFl?ARHIe>3`A&}+25uK^NNV{#^z^PnA4^qn1 zw-xm7E)-#YIcK5S((Xq`ihZ$r=agV@Fph-TO{nWF=F#z2N;g5OUx~erXhw=52|RDY zm@79dW?R4Do_-ZV((~RftjQ0ZJjG|#RUS#^^E$u>b2j`Z%y+oDKp&o${%&43PVzzOcIP0!fwc4v#r}b&p%fULh z#~80lb>sShao#tHp}0hhxshX>H}3hVT`b1vL+6GyvQfe@I2f3i(B9RX+x|6^R{XY_ zm62C$Z6sM~Q zBfUuS+OZfd`A=SHXt}Zod64UcVZRkhrc)cNEAys@5jJXuZ@J6k+T6?)QuMx(-jV3A&*bVWpQmz^)+`7F&J46doN(r#3t(kM4Z66$Pv?mcU!AM@cnkcuB zR!qb3wdQ~j&*qS?If6E3m1Iv^Qf=kjDC_xKmv9xBxzpbo4?FPS1&B$z-57hMi6y?f zBGMUGjX5qdp6}dMwmbZoWP8H_HqT&s+4nk8p*SBV!Ul0k40GS6 zw#vEv0r!U>hu)~|T;zDx&ur|5$^k8~^?#xaSDZF=!KZ~5Pa)P4D$OCK{({S1w;)=wUFamnww-Th%}1S5pBT^@~VHhn2@&VrJUAu8lF zS4Vm`$3Sd3(gLg3S$SN%Vc>w3i0f5;>rUDH{x*6GLtxc^%Zw96o?=C-nLk zqQ49X*3^C0A}YK&1ek4*-F+=h3-w#?o%O9!dCMsgAw!QfmD3Bk%kw|8s-5$}o+#IB zYDXXSBDG=`xbZXxU_yo3{${VU$1aLAS0l}8Mg%~h)e#SoW_6p~(r-A0Q3fZp9y+)z1o|A|J;n$rZ#0@(Ij2-M zc>oV~^vFMmeTW0D8xvtPT4>qT(C#~=L=8#)l8!Nd=zQC{n{d6CHAyqF+BTlGq{piuV;ZQkVg%k$3F=(Q&hJ&O{(JJIM zO6+c>O?ja;hzn^`6TgNHQxpC>KkcAB72S#!aj`hLvFlkC9kpMiK@fT6!Iuaj^b0wsLz} zgC6Ais%7XU!khy$K6rh&0P{9&syVoT)SEU{ijfU2_~o|-At+!@|MTPeV$P04G9YbY zbded$XQC`aEJDn2QwbnJ5G464GY2LrDiA>>(p>MFZ=Zea=(&}yU!pTc^$+rRY4&p0 ztv8Bcom;V3L9rk?;pQvp%_eC;u4aufIxzt~Su=hj z%`(9SU5LB8EvkrLa_#B*iHrQ70|xFg_R25=m1V|l8v;nIZEcN~b$-ucp{v zkF0Ck?nqLFzOC8Il~`I53|Rg#6Dbr;IW`-Ci@SC6y4E55U^d|HB+r0v2=- zBUf4I*pDOHrp^a}Koec75~D0m88TzFt9m!+5fS(ja>ktc0Ss6EiOz4d|4k)?=}SAN ztU}xAhcF11GYNPj-@y5&mwP_v_}0fdX<4ZI4mS?yWcZiYWdBZH8gB;UY616udOUEW z1+(a*!U8(s>-2+~cLtXnSE?3T&G|Gw;;u$aR0$rYIyjBIqjzJz-2b;C*ogN6_@n*O zKYKB+)~3gKwlZ>#4-Eo!w9XfCO)y4#l;GbgPP^gog}Q*3zro_uO_<_kG}&GFm#T^W zT}`s@pRRPN-Z1Z~+BI&Dp^Krk?B-+Gn7*8^E$>#m(+z@c%$~JSj~;57h;lk-q24mp zZu79}lLo#syd?B;$h_*{%!-8GjX3pzR|kxQ9bIqyJ5OEtBW^Zh*_&6#Z>C|MBTL0@ zoMx(sxM8H-gRQDVi`v@6H7(9m1Hsd^-l1WvB|mZld*^0v%0O)QmjoP_HJ4}!J!otS ziDWYxO>z8i5pPjgW$I8AdLH)!v)1h3H3|d=5r+uTokAyWmLmo!ox6*wgE3Y{l!BML z6MUY(>`d^U^qSmnb?_pF!NwLcFL^3Gaxyx4vFFPwD| z{}jwL`ys8Fp6j^yv{7|@6ad+2*9esJ_w2*3d8wsj5qA^!NO8&|z5Y>|I>LLoL;ptV z4Sem;80M7-ZkvbY%&+l`vHjf;0AZIpB%oMjb^y77!#zKocl%^@;Ha*ycNl&ciSmN2KkUj zC$4`NSP+Fi)eO`Bg9_=l{}d{KKz8F~AKdBW``-aAtQ3NOeS?WYK2eX2xsHQjUcaJD z-iR(0GFUbfcMLPe)nIlpwLttnUoBf(Q_1Xem@D!+u!6aZfpD+y)L=r8`I#>R%AJv{ z2Eop_dkL<2K4EKt@Raam?|TMW{- zAygLFXco5<&U*GV>`t7m`Q`QgWZA!eLb}HOB`fJJ{wrNxA+nWPiWb^2_c{udK5f&S zUQ=C8OpBPV)U}zUJmJ_yEb=Mz#r7yJPEtZ2O!nB#w%q0(x0(jd%Wk#GEd|~;Ij8-J z(pS00bbur2pIig5z74^qR`wLQQwTN#Hib_Yladz*6_m@f?cXkDZcbqu8FNU>7%`?V znx9olRSzcsD*@p|4Gd1_-w7sZXCX8U+sqk zVt?hkAJ24|*|1c`R!`m^b#&_LZg;^YVdyQj$hk=E$~pDLuFO>uQXc6t*oG|w+`V>A zkjnvlSE_&Twjj_4TyMh3tc*${+sz$6rq|j7HgZ+6xwAK-Uhr0o+YqW^>vzE+(^8T7 zPMVGFJ~$)fIG`jfm1VZt)}$c@fu642o4D%Z!#!1zHn?nzCbIhUnk8;4VSH%^(^&cK z7t^XlayfzJ_TN5h7nv*YPDL%%LTlH|>R+hU3uP~r12L;(m`BJ3+-r;zvQ_}J`;IthsAmno^boDW|%6C5vesHRguo+P7l+d=}x8}Fw|G23T>JehC zdj=uFGgru1DL52V8?`>MdN<+Ob9L+c%AIBNwKXrh$65amd+#0ARM7Md2M|$Fuz(<; z*g=sdy{jl#L8Jufh*G3WCsY+sFd(ARi=dPwBtd#jL;;l&5JQK65uzawAS9F!cu&x~ zJJM){_nc3OR4!~K&j!i@8dJlt;p4V}T z?>xjVKv518|J%9$U+nnrjsJ4sza0242mZ@}|8n5J9QgmjfyDBAe`Bcd;x1M`hdp^)2Y@M{qj01sas=f$)c$ zAqqum#DE8J7j57J7Os$D^LT~-2R)_`pQY5CRQxhEOunqU#28*J?`8zN8vV|*wA*t< zlGIOWp)64z-=}jg{|O8KM+Y2pv`e(G?>_z;?f0iRwDqgrX(6K4?An6iH5T|%0h_^~ z|MBXdUy(LI54<0Q;8EmnlxLK;_h*LDyMd491*^?ntCyBVYb-@9iy_kHSt1oD|Ga{; zpL$HGq>LxpZvz&m#+%@25J9w>T#)d;5l}05grH~1H@cGRss9j+R_Som8!#6LQyk_3WPF+^RLaYBiF3en zj(!5@$eJ+Az|RkZ|K{i&4F!;TqhA5$>o#ndys{ZsI>c!ZS|LZsxUsC;Rz`>={05xu zX;mUeCLk*`6CGI?78&@H)5Kuv>~7jnXry@rChQUe$2-Rh!sWWUhTvy61txqT$w)Oh zPi5dON1vr=%E)TcUl}+WXx3_B34qvi1}w?u#g<0sNjj%>rm+pY!g@9MP#VMsk!vSW z-PW+>=FZ^l;ECX7{q+k5J64_&nmM3y6bxX^=i1@l5e^Zq?3sKD0x3>2Cv_WgH`~XX z+DGgMcsHVQc#Lieec&%~54pls@x4UlT}IwOz<%Yfba`{-)ILH_>BXE-A?*5%+s4DK zUzCb_^ML^#ApQ8KXywjbtsZttZJ{2^TD1r(#gnYsQKyPMIBF=!r{zObUxeGgrw zN285_V`h>o6pR6Oe#j8P;q!M{5Vn9&erU_*VTKfF^ z0uN~L)`owFLoxdnqX@{vv2GjyfMtUN4x*&zw)#0Um=s{$1TK+!k)wE2!6cMk?>Cl^ zNm){d*x$?o?zK|Kv~D74n1ZDIq?D|Rb3la2)5u@r`Iv9|k6M62wIDfCDgl9@p#}1Y zZ-7TYpe)=NLa>^~_$a8F8Li(utZMoUp#1?|SwB-L0zo5&^KQ3bxp1SWTIzhqbXHg zE)vecfsuuK$qR&TO^L0PgksJy1iwurd%l7&BgT6!Is5o6P;}RA^Xv#N3)kKtfxN8qy=Tbqm*yMdE(n9Yp( z3^3*!x^m-RJn#WV;d#`=$Eyp? zJXGk~0#7<-Wk3K~{tFFM!F^kl3y*=jLi*oFt_)EEVchvfo!Y*j=o{0Jk=lQaUQ-en zX4Xe#>!>X_1R68eDNckWO-YXZ!03wGMHIk>ibL)P+V;zE8U?PCHHR{n&m?&uD&VPz zLYGhuR&muzv}s*vG6FuJzROQiZ-!?JJkvj(P#%>)i(JLjf^N=+db$nmO)4+rGBsOtiUf#xt_oWYa&W#l;u!J`?F)c$wyID0iSx@6XA z;MZtE=6gU4gLXL_euq~%7a47Fyz^TMtW6tS6}gh}ciaoWl3o$Bzy+Gai!Fm#Sy=c6;rn?GsjL>4G4s&NfvnO;z;Ds)?_30$ zB|cm0xlLVZJw|uuBH6O$y4V&hF#9V!$18sM^ZRFy4Qex<9_r==DZUC-quma$3$!Qc z%qSodr*-r}pb8~Cp!VKyc{h^-d4k8cP$@dy+vu92x(gH?(NoJeNOT+JXvyen1ywck z<~psywZX^bjRk98c9J>xQNx-*d^}7Zt`kxXInRO4+>-<#&YWzkpBy4+5@4KhJN6d= z+FWx@jBHg5akW&xE($Ef%_v_2?13G@szJLc%q+0$BCz_v!KqHxGIbVCk%QS5R&ex@ z+O9Ik#Z(8l0bH_5Q$<%{oHB}N!k}GKL+|sIE&?w>F4b_$lNcRt%vwIPHrmpdSa%cB z85#+!4XFjvb1w~8kX5_OH{RC=d~*Y&mRJfrR$rR392{17d5-S|4}e(^s}NASGg~AF z{+%SCbv0UrVFrv8&F(Tk&I%PqGS3s~-dLvv$ZFUZxB-ghHB+Sxjl_dvRl8B$oTdJQ zpy;aMGXN`su0S(S~V1iIDruJ2rh>BT-`zd)uhOqa}Qpl@&HCP+XkxE zg+Zg)ur|&*Dz4eKSH_Js-B$DkZz2@`=bqt^T@#{we4(E5?Fyh;z@=sY3^LE-(|aq& zjRg0P84ZPS$PI~YAdHJ($((-RUY0LM@ZvKt!o9w@3U8uaj!qz8f|LU;GP=CF!#ve3 zL{z%vOb~iE$IiN6vd0>*!2+@D~a#Z_kl49s_*j1w#zXa)9}(zUVct_WX1;wR7Bn#A2_0n02l3X zB}BeYAqjwROQ`$EF`$~Dl*3WAre#P&(Uw;BPvlb~y~goX5DOpopJF+$0AM19m&*ilHIgD$JsJH>p>*t>VPBUE zBuT&xb-1tMXWNK^4pmMfpK#cl)i*~OZ3~?`L?D`gyNd#xo#i;QVR}v$oL@}Su-HPX z#5+UfC@5NWs~6(Hek~3Serz&H!j?G64SXDzG7rT}F9AaVLzy8-SV$R2IMjpw*}VnxO>T)dB!erK`eEz6Wx< zL4$7r^#MKv^*fil<4sINKL$0NWBrKb-0LK!5Nt+lZ$J+Y=yiJ|qp~4mI$MAP;Wnr! z*pEeh%sfp{eX;tF8p+55WP$8@7vR%rBhc452H9q?9ZY`hVhy`;gL|!ST?*;4Ue=TeeN4?;nEAZI=iR>ir5pIn9<@cl7;P zE3--jz6Am>$N9Rg_w@l)Dy{>@96jqpDbh{!kaAV;*Y8Gn=gMrk=+RTQ!?^`?pE6Vq zAcg3z-!GUyvDjn0UjTv)vIu2hjII^~TO$VHAa&I#29@b10l(k&0#+nu%MO5W_q!18 zcngB&><}`9@r=wdz)TY})!EemMl~{GYh+o(J+M3U#8%Y}fLUsvjNpZw>CP(E89rbh zZzA4a7Eu@G094Wau?$Z31GPm0AdG~>utv>#+f!0)RTf-O{Xa|t$YJ&ch1$09xdJ{@ z%zOV?pvhm`G^gBQ zcWMB(bXWVPwdGm?6b7%(nX@l3yv+C>$U30L&2miH_X2w>Cjp1+jEE423DK{HP1*L0 zRPaiA9&)@`_%-GZO7iY=`)yiePamT|5+Td2z zB#)nQfU^=r_a655Dm*;&g<$Hhp$7s@yN=(01}Ss~-|r^@uS7>^ztt*sUwkV6$0gMH z>pcR}O9(PoFxip^;EUdlD1i-K-uyuUA|Bgh4l&??BF|xZuc05z)iS<&?Hm_KZLl$u zNws@U&94&&f>XL0x=1$0~Al<%-O)@cnvU&1+VP> zeSibHh(v3a;Ti-Yt)@oX5(@HrV-|DNs;R=%mQ-sfDGI__s&o|T4yf&Vkz*^Nisk{T zq|R1&4p%@Y%T2n%I3UMp(eDrY`!T)(t9lb#o#N2LsbLDkLSKB@vH@EHjXcIqM34|U zbiu8ldT{$Rp^{t(`yqx$3gmR8aDh7e&tx)ft(xA3c?LGDN&o@dfd-qv^I)iVXefh4 z=&RUr+L2vnvY57z9E+S=63U$ZY+ZxKLFVXR9txlBP1(tYe(@hqCUT(U7&v#^?)&1G zUjU2ja;zluuDsq3`*US<|#Cwfl8tCf-rWT=6$&yVBRmrsHDu9eZa78i`(ZA z*YI+|3Oe1Tx3`w%`e{9sMm4p(Uj>wRgae32vIE|WhWTC&G@ikqs7C?O*gLHtlo6*n;KIOWOCn3dA{fUAlYcY{BfpH4ryx>$ z^(~s}wUj)hIFI2xIRgVDg4Gf{#0NgpxIu&6{og9d!-l|l35ddJsj~pmzfTfY%jQNO zEVo1${}HMpINp+6#SAY5hFMD0p=oWcnr>OJybU}ML@ud)q$^O0ov{SdW4`^ zCZG1Jek+6-T#es~cD85&C=6gxRtZlDb|;gzBCo+HDtx?$lHlCzvAT;uj?tpn zyxmyzf7LvAKCvT!#adOpk>f)$C_mdO=ayOk?GFu#0Zohc`dD{@Lw!j67CV{p3P_TD z;53YTa6Ia-wg1~uVa5=YtsOg0fVhbB zD@IC3xlsGE5us7P+T?&Fh*&pog{S}QicR}R#DTPxwRS`r{?zNTpILAz>mM@^ONA`6Gsw;w zh>63GnKa8yFTFP)kgvy|2%GaVX5U=3Vsq2l5G9U+7Q3<#rR`*kA&8sn24;+`-($Se z{PZ72Tv1lA)cE#~8FH~7-b=Io9o9$cDk-;X`i&f08S2Vu_PXIehX8V!%%}Y3Cyi3b zh-Dyz^4t@Qvd4CM%~?hS36%e{>zw~&jU-WGfT|Z$LIfO*5GWG>YEcJiH4STb0wV$~ z7+)xhlsrTfTu*5$ovFsNm9E)pA-&27LW2Vf4)%c`IVSkuF5K(NinV^gebNT%Xg;b< z`01N9`8YK;X`$B^x9iAb*t&1lnk_~*OyJlY(=)PFh%ooS28&U0D1q#HKmeq8;@_ym zOAzo?t_C z`3RqZqtC2|etdZzaM?H)M!wxb-#)@55PwK#V+<~>@JuXJo^0uZ9KUI&^hh&ib}|bQ zg-sZVvLUI?z8-p9aa7t;zu_w@I}-@dFmmo&(LbjloQ^I$%!r|Yz+V^N0#X*93pR~U zhD+Xxa1P9)Jb69N51^-lPz=9zuub2B=CGpO`Bg%Ucbnu;NcT9p-X{>fsAR%pYyT!y?n*)0nQ&CUhiQ6RCTnT*M*D+q;n&UcYt` zY9ECJ!b(L2ZA6rvZi62zBv$|>S)&-)poI>*!6=5Ui$qt2g)rVyx(;+<3)fnoPHzXP z9r>3qm_B(3)a}+;K}3*6uyuR3AZnu^EIja|)|Mk1Jqw`j3yk}K2};23qw}bjllYjM zSW}IM*rj9L)}m6@{%<>=mW&xhExQ9Q(k1iyRsA&vaGqr6t1!2~RnL~8{c|?D5l%V< z1X^E%GXod9P}{Vz*+z*V*c@zQiScr&e=QyfR(=CiSK)_ntNlOBU=N?L7L8yRRA&6H zIF~`2yit1C`DFz)u>#w%kgip$z@e6v%Z!CR={^-}RTODUKUT4ded_HjA9${51g(V_ zj9lD^S9sFT1=J z1#+CJH441I_|>*H+*`JxJXerxLAx#LQzl>HJrpgv%$zW6ytYazvQySATFbrXO|NfT z_9&CQ89~&V!@31K>y}YuBN+9=u3|Uv>T9mGx7i_q1H$9XRG6x5KN&3M@sijp6SBeZ z2+z&}dthm0<90cAY`Wn#!0)PA`~>)&>Oj7uFs5gLpL7p4t*e0Ei2Rx#EjJr`W1yCN z#VcvR66BZ)(vAy zEOyGlMz4a4V^+Xqc>?~-vtzCrcqanZT2z&(lT8?$XBSaAz#0P|U{w{b!n_$H1Q9jG zIKo<~p%axbsF`?9t&Rs?Y#|LHY8Gk+StMJFZu%7Vkn>bB37!{IQa{?%`{5=RtQ#P?#nh^L?Eked3FdW(^Y9d z0()p;f{d)_{W%v18_2unsv-?CA1e;?1WwO+1Dk%pSGK5xa#wL}tTip!Wb->q4#WnT z6W;@Npxks6dLJmtv~Qi&w}`ltmXA+>Rtw`dXaMDtHYns zydq%GB)bpd;7Qq8_h;-6ZFOU-u;u*O?>%Haf~?Idtpx(d8gKMjk0REBVR3_kWBKi) zLmJ6iMGM20B>ykJVS()6nxHSSSQ`MxrK${xQh+V#nqjvQktA`F*)+oj$jada0%^yZ0Nh7)V?Ic}UMw z4Ptvd^aj2Tyl_!PxeL{WENzzKCC$2vL}STiA1*aL%W?JIm>#depFSs>d>q3o0!yY5~Az2%|JBpf6GUqWNs zlyCYh5KgAtfIY;RxNmw6Z^wCxkJlD0&QYda=#oMHj9`L~N_U;NexdVF)z?~7*_>XX zl_d#jDYA_A!e!UkOl6xT5p79Ixiw!yb1U0I)xIfI)u-cgPF{CRZ190*rr}wt?5~s> zYrE9A0`7~e# zFsAx8kz=6;NiQgA;2V#CfTt?g%C5IE??eAt$=WuOu6ss%1W`;XxWUF-p(nO4woOk5 z@h-}RmM&kpXMtWeTSa=d^A5XOwJq#gAb#f$UeNRe4ses|O#-Pcc@D(E+JJbRkc`Ki-`LA#`==A%gz=5@E@+vE*UOVlhqQcsn*j>J{7fhdVy_zoJkQADsIDe z$|`=#dILtqfo?(1sNnmJWZmN5nJbc}Iks$DMt@uVuse)9%TssR4QZ6~T>}d> z1NP}u6z{$-(f__W=$5lr`nxeiN_+S%QpE?MIY`cMh(x$^;nTs;1FhU3T>s)VnE#im z+>Y_l&ju@QfwxJ+b7U}UWzI^Xza$vyR@gi1;-)4@B{m}fr)FKVdlmQt>?qtPwv1bk z@CSCYYKuwjnk(l@UVUoFH?eH#gB~}V9glA3t;V)P@V2e94v;1K0h-tE7mmtxrtTe(gz>kb2ICPx?a>|aZhPt&b$`8Zxtf@t_+*`Vs zHS(c$PC$-ZGDdC~6)I9)mhuh#5<=DIG~WZJp-qAwaE`(6pX-m%!1M;J^TKF-E)SL| zmk4yRR_jQQVc$W}GRl5bW?}^l(>)NRh+6tbEx^DHvEHwvrD4NjH0Uj{WokzboQQ}F zp`7#n$VNa^?4_dW*Am@7ayjyAVitx-v9RU(Q=`pcEiEfKZLGmfqRe6>J==LTqb-zZ z-t{O~y|*#EjE3B}RA~4~XyZo01MUj{3x!t}48IeGc4))f<(}~aTdOvXGeWE1kEo+G zv#la9<;~3cYGbn^5}Xe`8JX;6+p+SwEv~|g>Xz+;tu=iTt~^L6nWc9EB+#l}bVDGk zkLCdnB;-}au{LOJdd)Qu+Wo*8d@#DX)KvYK5W0?c&Q+T;n)7k>XTUo0}%JyWOV5i%7*&Nhnjm4b&eZg63!|U2+XLlAy3r8)N z*A_S2Th>n#i4+GBnl2~j#>$iO`b~cyY$75?U5>1J7p3o@eETjS9Hegk6#CuzkKN?W zWZCr1*Fv!|O%lC}SmofHGg{RMn#U@6j3k}t?XOw*4r_wXkXHecX-oD!GI2V|5yCTv zh_&6MaHbf!32EBsmD~R!t4Sj%(;rC>NT!SyMaiS?Hr{*HUJcD?XR$5x#?OrQd{f%^ zu)TAk?%s5ISUA#jhie6^w?Fck`rRCQb+{P=e`8bJ_Djg`XU%*dPkmy~;59`CbO1Tt z7gXjqE`stOuL?WY5hY(+w%`F)T%PS$q?wb{2ST2a%bZZ^Xg?G~3sZkLR%uSRJLjks zkZ-Q-J?`&pikX@^)8o&ieLLPKp#Zke&Pfp=Oad8Io*GR_gBPux9yfeBHL- z%^O^{@&0#SyzC*tyj~zsi7xYSE&M>e-!1Ed+$dRN?n#=$wYdU_Kv~_pE@(^2mX@Q} z(rid1@8FJE-g@8qp22w6swF&CqA#}kNMv(Wlx1^im8zzFBGJJpyT*Ju+bkP|%a+gx z+{z&~7ne^`l=wGu)3hy#O^QJN)C_j)>TAjs*ijWT)5<-dL5I<@axT_+ zLX((?hk(t>a10D~%VNM{z&!7^Xc|OBJ+pmWu|0%iDZM+c!KKi14V|-q5rAbFaRODG zc`k%a5d*#uhw+9&Lu`(@RWH1w$nrbrSDNlv)Pt#(uq*^i=P=E_(hCgOg%oryDZ9dx z(qu7))v7S<43m@h7CNz6O!-EE8%ho^U~Ii$q<}*@d4r&?zDs~=YtrMXy<7^ct7UE8 zDje)~3HYWp2W>ESzR)$gc^)incps(i`<1BoxV-w}(^VK3*zf>BTf3WFg4& z?MHB)msC@C`>LmN-cPbvo|hm}04`A4jM93LlZy8rKNtXmN)B9p2_#CB1l&~|*(gf6oI_`j zW{vr#ZA5-p)UlxzHf+xzXr${HqPXk_v)F>%4MZZql2UQo%;wdmx8r<-)k1UfU zJV`7zW{8wd@d0PiA*2WxTPw;k;?5rWcwLw|Fk2k*vYj;TpIfI`TnlGMGgSm^&J5Qg zv%?~CEbM&0I@ z+6h@=L>i&q$XVfMpBTP^q36Tp=%e4W5?)x;>eb-Epuvd%>Ws|w)!J9B>O@rZ1iSkb zIz#n<*#%N4(5hZG*-R_jfQPv{3;rah)v=Ed)K&V`rR>;=D#c^ho4+r@N<;<*TqdIp zm8tCz{qJs=<{_POMi~zgU0y^_g6(wefLbx8EwR;6X2AY^IoQLWBIXYE(|Yj`LC^2P zP;Ue6+!_wo;a3QBLBcGOz0@Ot(#R z5T*$C2t-viGOF5<+DLU1=LGBsmddW#G24i8KOG1J2#)gBeVq@qZp?YZ3id6#dp^O= zv}_V@+4ZgDMl`2{vjxk_KtDEq;@ZfJc$LWFIh!$*?j_qx5EWkh9f^twT)d`sA9 z;ll9d9vHEqp{6)wo&h7)nOI68oC~5U`A$JX6u6>2$m=_kIAJI#$b#r ze)y)P#CJFOg-5WXwFvhFMIXWYpH|j1J5Q*y5MzY|5hnB+V^eik0;-)XG7X&L;SXz! zb5-A!L$xJ4SEz}o+b8#vJ{x`*j91;Ip@*sx_FZSTZNQ{e`uo%prSAVGlc8dM3KqN9 zQ(#BaHXCF7yedAw2%Bq0n z>~_Ns)F>;_Ja=32MoY_p4>1I<_1jd))47pw}O{syD}b>X{%U zM?X)d$4grZ=Jna)HYVj^)9>3+KlB`s_Cr=5n?hdE?QGcWmX$}bn*bWw1VQx&xb$v8 zl>`jx(>SbLWT5XLa1ynqo5xr%s|46nmMS*Dua=tndUeggT)J{;b?J)xs2Ry0H#GZY z)%K~01(r}7Sv4H@8=}658n>;Kem-_+Ic- z1R+>m&$UVCMnDXqp)~ADVPv|dv*(^9t7bUZObV#l$oCWU%x+iNzk{Y5 zcvu!w5%#r3qiC%daILEIMJ!En>Dhul#~*ToJVl7#236QAn%$vkhKponkqx#W?9(@W zZ^oB*Ae*5YOCmugUs;&a*&(4{qx%1~=l1Ux)Jz}DpeghKlK zJ43%=eZ=@6HNkAG!t}eNL;qq^UBodya*5$S;7E7|Ammjf7aFfxI74-N$N)1-&mt%^=tL(Zm9voYPEij#5+oSws3OKYN)-Ry& z)S6&5W7l)#2a70Aq{N0j$#tcq4Ns4^8Pb|4;8PR3s9VlZzdt0S-`F{@Sprx9s#Qp~ z!+1%4bw0~Hitsg0Pzo7Y5H_&;PF=HK7s&)cD~Wu!I$g_+i4gCjWHQ66O);A$VgvH8 zA23?-(w3@d@xk7<#X4{NX79>USFypQVOmt)N84azWS9UQIbZ`OA2VZ3WRpj3>tYwe z)A{PCI76-46e#8!qd!XwHWOK@OPd8dm1nRNEQkoyT=oS`Hs zSRFWfoOD`h^D{8nkmJcH*>5m^x5CiHTlT}B{nk+@yy=~W$5v+2YP*7h?=l9;8i8Uh z0G(myJ_U=q4vhqURuN<*6fU@!3q@mR&OAC8}sn0cCixL2<5 zN3OTnH#~y7(s^K`ZcsyymiwlPFC@Hs!>0V~JotFYt)F5e+Lu572ERS< zP^;Kc|IUc)sqjjx?HW8Mszj!Ui6!~u9&Wbh`=?IYmOs~z;RJhVKGnUh^yOFP&+8;s z#F(O5?=F)`kK0p%<<_+dHx2xmK1zFq4`GS($8Kaj+GUQmdrwOuX#hXQo6dpen9eZi zCUDt^`4`afbF6dxQQr=3%ygZXTcZv|^4dCySKKp}I_wXSi!luJ}iMZAJz@a}fFAr{}VlxiHG@dHt-Xt$8$T@3EL;(F42n!)l&hPOtaw zKS?E@nPwYHo=u8KVA`w8rg9zMthTA5KYtSP%9Becdjfw-nDqnbS-^Ilo8^pj*Ab{& zRM?yS>%X|w&e7H^!*Rbgz4oZB`7}$Nh+u9T-aM6Mpu9HVsV|3%Ke8{#SQclTbNw)@ zN|E2Hnp*}R9#e#hXt-?Cr(p=MRLc#4_n&w-PO6H`ZDF$5 z&MJP{N3F&NdXl`-c8J~Jn@sv1&%a#h4IY?0<9lV76jbf+qwRk0d*oYyE%Ga-MOL`O zmFkw2R@d;%{lf=P2^J|b?uVR1MT9?w5;Rb$kD&)IT{;Ti`#tyJwouFDc|>X~blRZ6 z!tw|~XEq8a%|Blyoh7O!H<^p)RzCm<_;eW(ZGS{HL~Np`QR*QT;YdltS^3ho6^05AFA$D)D%bUTn*88-wVk@vI zR_kkW=CyA!u<|@&l1$znmKUPd;-Bod zJc6qJA&X1T41abna%pbl-z2;5V~jd?`wd<_bBxcM;g-IzLG5l)1D-#xX6s$Ed&-^E zC-RFslAoILBfAbqz$rv)*Kca`amM+{x9@NF<7K{5!tpH=(eYSHG6Ex;7y%gE>EFJG8 z=^o!$fgU$sGZQkpd15pEKIr%?T|DvAG4P3@O)X47i)%%Xy{FhDx3soM{_1G_0X?F` zEdT2&%lA6-1RPnqVA-VIFI)8BrK3|yqtO(z>k9asi!R*KTv6v~JmBvWkIZxc%agX? zHl||u+*QSzIJl#%gYe-S?qr)%S5;MMiLr&7++3oYt_^M~Ei98>_FIs&oco^?gWZM&NWGJM0{ z+oQdCT*Zhjl>iMvhN<(v;kV-H%7C0F*+qvQEurPJbBua5Xct-hfqlGZp03*oSczNh zn6m9oZd2r)6~19WN`JUvoZPp8Eo<&nnYjLfy!R+m2n%(l~bLYP|yrO=L z-$C>-&;!(jV?ii_ zITp$XU5qWev#l<{aqsC|rOet34wsa73)8fAIW4^MF>Cb2yE)=<4UG9GzZx!R zuOzX{A4h(^_iEHWD~jMK4>GwQ)V$YkeGaEuGT}@J7Z8@YeCM>UMe=5(zzJsjw(=UI z&|u{U&;iZ$$DzF57uGYwE=%)8{v!N&ZZ-Vtxh}qp#7};x z3#=p}cbxfMRZLwjap9G9?lsFJtwF9QeXj<4v?{7V_6R3;7|6V4 z@0_@0TMH;dNM!UU=$W}~&Chnd?f>FjGB~$@3x0Bd9yFYnbu+sI9i4MD$w#umKf{=J z$5YLR{SK0|3q+B<2j>nwO9;tJAyy^*Q28-FwRtY0Kv%a;!sFN7|xxFvFow*nA%OoKJ34Z=bMx7LBF@+CCHDb)CT6BOzq8P?EYF> zKAI346WB|0l~ZYV*he6K{VFu!Bm@qUn-@$F2KlvQCov7CO;*;=P23G=n!0x_psD7L zWlc^HaOf^yfP;0~(kg7vgnfXIm`UhSy+jaEF=_SoM1|?EjRp~n@va6mDC%O=$AMef z1F4s8+D(o1h#6Cd{dm%&=VH@t(%8(dTM`N$9l+;4-gG&@@lphi=`=eW z;ibLByE^gRB2V#gvCxIvj?dVyPrir<*z$aKmh+zO5z|ig1UgB4bEAuZNlXyf_S!I3 zC+R}gafP>)L!jI*n*Dx;o5535qF**O``pU1X0sYTVG{I>NY9+b3$N9QD(c3gJ0QOh ze@s?iFvp!%HqU=2gAASo@+dp7mmZIu9U0wA7}u4J>LBzP_R zS0!Ld;X1`9vGz|{yYzADCCl2nCku(XL!U6c$|WK9CSthRKIDf|4j-A=tCQ(EI>sU5 zHe~ANA2*qp8XFUJi22nMe?b1z3NHX zv*@0&@;HY*j-DTyExO`w-YQe=PODG$GOFe>z8;z++`*hVD$)6QcZ*)GO*HIE94v7c zCMEU*E;P=ki^+Z3>ZYoQXe9ehtELkEG5Ymv2zWusoQhjBo-5K)ictx8ql#gluGZcS zm{=vb_)ov$AXI}W=N)U^?FF>(E8j$zfM*-E0H%^{v>ScYRyF2TPVagh|6!0M`Q)*f zLHo;BKINBi)8duV^^TuJ6ZReu-|s}Oi7x?NgAnb&5wCVyff|FztD@D< zFy;4*!MA^1Xjyvg({<47aKmm%wRJ7zz``Tb52hY_O7+Wy{h~+>PY!gWG9RU+J7;oN z{zDr{w$$L^fl}#{`SL+0)+64Y8qJGM<`yyY?EWrs`Qn@FNZb7Q{oOYE2-Z)Z+U=+s za-+EhOddN>Xo?*B$h#uemwW!g*?VnAsa`Ad$@{$^ANDCSPoYyMXrVgsHs4_6I)D!kB+)-t3wu%|FpQc;Tpn`u@#e>LGvVl}`&E zcct#QYthtLu?Rxob@rrXS<%h1mxXT2vOX0d;7|0FPUf9Q;i*I9O1)!uCk2xm4BzIooBBApi!v;)0{;#GxC76hcQZ~!;9GKW3JWdC#u2;uybni4Y@S-m zYS`IY*7TwAZES4ANWw4m<=acUF`_)Saof-?#q(oZ5lp+^xmd7V?wPSfCfT**m%)Cb zVY&V#Me!3QC;fK)zVliu$Ka`~?}NkE5Dz2NFK=U_(?RPrG1x9-yw$EUu+*(;GhNZA z5hnHdtyuP(r!lvg_F@k+s+t=(W(-!yo3RsEjqgG`H#Y_(tI1c#r0%l^!SC7cN6Sx3tH`Lbb)jd6FL)R@CfjE{NRH8rJ0IZjNyAxxL1|>uzptuZmzlc32H`r*RKlc zL@t)lsOyt^H)jbSd$ri_goo$Q+1%pi78xO;nb%D4PR`B~2X^LlU0NOqx^sNqK&ZxQ zDZ*V8F41^w&AVM>Gz$1$^yW=r{$Cy;!47#wRc|lgd){0YqSzd#<-EHmH(~6kE+h-# zMQ~wx)nsakPQTW`@psbk1#F65@}(&3*cV90%yma?QaaDgrf9g;&Xs3=*M0V>aFnVP zHtCjn)=-zm_0=q*tHoD^i&{3(aI0-2sO8o2d(56m+HA_2t^fYP)On)kg$R^zi__!w z;Dedw&7vAETDKD99zEg-k}m8Tqlr1Y^eBohY?yS+i$j$uLPvm5O+pUl@r5lDkExJ~Z%Zov2b%ADCVu3!&ED#dMYMM4R!dt3a> zyMk`m3B3O7+2Xtr7=dC({mn@|zM~-)jydN-c*ZT+)@Y9hAZ1D?U5?|%JB^qSZyG}((B}~^ibyg{=4Xlu{TnO+fJ=ni%)rN?5^Ea z`>r9`CfXxL|BZ*URDj&nJO0^}^+U5+_t@Rw%R0YDzNeZs933_blleAq6DlrTGsvBe zZBf!Nc5vLr%w=Ay3Wdl&DyJ!Q1;6crk-X&xYk-61rDpWJKRi+TBy_n(^xx;}fMvhL z!(4f9NhkM7tARUqw3MsFGc=gzop*1Z=}~C+(@GUQrnGM+SFicx=GPW@Mc=s#1%dB@ zc{|RGn5Sn(pP^)Z&$_0IzzQiLLYpv8UVBVeQ|b1|Pye`qBR^q{NZtkpt}M#l zkU#7#_|`S+={Q}WxKZpv(;?WM9N_E1m6_TH$C7J6)fdVGXWpp3J-RD(`K~BjI_QYC zlnFgYJ(_IlRcvDB6aM?^uV8o6q*i8+Cv9eV=Kxl{TT`ak1j4uGeFF0!d(wo*>S(damGIpmYAunbv$hFYvgrLz=r1iY z@3p9h?6paARohBpa-oAFJR-iSq0mM#+!9gD`Ff9qGr4|&$XDE&_7JLip-V9 zH6I>zZuM7rK4`t~$_+k8k?)~ti!ZMJNqGXPyVXHjl>@q~;lY(-qdY-pH+XqhRCcs( zKQXiti6*h?eenb3?dl>{4f1@~KE*;WHt;WlkXN#C9fyS10#YYC6u8OWuGu5!W`rVg zL?@ZSmlcx6e7GlP;MO34ufJLG&sQLx;lbCTqEo*iRRXHpgjZothi>#0y}`D| z{7fbU1I*2CzIypw!BFJwtYC&|l`md;M`BJ|uS-F2^Uwl;d-vzSp0;<46dmT8e7}O-)z;IX8}}e8@pzdp{va-q_u0D|U;M^Cvbfdf zCth*RiB#co9~uXnu^1h{s}Fs+WxxM)K2vzJ00LwrOT|Rye(TuQ8tpcUNxRa1<6X#y z_jcUOacBGd*Zj*{i0(t#CbM%Fx3?-7wx8Xxa*`&vEBvHu9%)DN0KolXy|o5kd>u}& zP0983FQVhbYV>Sg;f3G*|A(lz4vT7Q|A*=BE$2$3F2Kn11a z2ugQ%3_W1LkV6S5odXOozs)(%_kFKx{+-%;t$TgyzIVXD|2CCht|0J_8~B4BZdA}@ zae;FkDxxmNlj@}za6$2Vlen{E4jZ~9=r1XUCZ6l(^>9C`eg36@w3);47t}uq#R&HO zl$OTLLBOu+S{t`HTNLgQ{a>H}@aorw(0o@K_OSWXQrZJXT}z2zsVihs zFxi>%CvqZr3vYufwY%AA1 zT#M=?;jzJ}YRRs-zd6*75B>Tn%4UDtCj#|n2WO4d7;}puYKfEoWLqze?Z>pWDwWt< zt3Xhsa|0JYhF3T_qI|sX^X20q8bPmLTZPEee2z5D42xD*uT?6u8-;y%JzW@U^7=uU zOD*H#0iM$5nI6mLvd(@>gEr$4Y`U*uhE(2@ zZ-L{0XrZIZKl$)sUmk}W{#riX&>+90-&+2xrDGS^)AReW|C3#cefMVGN6pOvmA)_v zvm61JF6l4lfQ}S(Yj-62YZLF#<5Dc?`lY#?VGn!F-wr_4GgKQjW@mCMxg9-O9vymd z_E3pF+p6ovc#D&JWi{)|H@9p0?Y!D>kV~M-TXS?m%D)e~8WNjISefx!VXvH{i%!x_Y?b5q{ zd%)!FA3xmrU941+FeO&pS55M#m~7g_tK&tRKPi{J9)8a4ijSy)vClMsXM74X0T51O zNxhsyhe@Y`EPli{vZ2f;v#NB~DF}(3!MoL-x%Cx$rAVfM*kInVLyN4Eow2xSn`26; zAWxmA*QZ2Zk#@yaXD042UVvWt6j;ZX02|#8#q)`qYLnu|$BB8mwg0D~?DwXHMq++8&yBy3J*_9<=VMwY zpCE1)d~3yLd8gW_y$lgvrLa#Ux%Gi$yi}JkE~;I@1)ZJ%Bh+EJadmEK!h=yXY$9UJ zGHkSGOT?GJL+5wj55>7X5Fbo?!$#v4@R*;&k8N~_UzE+^V_<_wd+h7sL(?p(c)dg({vOueUTagK3?iPu`PBU-H+9O>(jB7>nW`T5EzL}%IOOoQ}$k-b;!2H_&4UXf`m4zl84wG z4^UL|iacC)85w;RnAwj10J>%@ENzaJ)#N(W2B9`W97Xxz* zIlkyjV*LZ2VYEp-SyIk8y2>)H-inspXL)jhTD0vIl0^ldh*P${*$mzsnr2;Ql-&=@ z+r%<7CE5Xmx4R)NxaOnnVmLnZYbIg2a3XC>s3^qbvu(X{2U7tkHh=ty&71WgLFSe+ zzi$JVkAu28=S*^aBkdU$pbQqpDVRIb^|ZA@M=FWU-)jZ=XRm3$J<49#>!}>6zK1i7;>?A?Z4|H>r9->$bmw=*z2zet zuI$ILk%dczb8lKqDu?^ay;phPOeQSiGh*D3{gS)_6ZJIITL$ZmOK{o9at*YVOe+K_ zLPi-422OiTcYJ<&o|d!?xY_&Bv^WfTpSc*lT0V+A9?WOH&OUaX+zD)^Gg? zGcrAU#v@-M8!^EWOy!I1OFh;GPDgHsl42{xIKGh547g@UPxC9c^Oy%~S#_U2iovZ> z2jJd#zVtZ$xe1-y$X1July&St8Ua6b=ZRPN< ze=S}R$BpfhMLHWBUMk)JZ;sqffE#Bu|Fb_{B$2FQQy@AS%tYf-#_b%wbe|y(d?!&X z+~qK8?&PH;I^mP@r`@Sos%hHPef;nrNA7Bx5$}iYxu|)Xld^`TW-!$}Z)4Y$TJMyn zgoDS4trH0;F9uT#A+o|=$f)q<`aX#1_>Ku}`?#~i>v7>a-+Pvu_%4`u>8!JY^X zQ?j1g0t=XhSDu&REhV=1xeld5zIhI^7DThvIz~Z6R6?YreEwjh>5Rdz)`#;4Q~h%)4~jOO6!62 zEA03B@7Q^SvuU=pJc@%bocY8tTyLSPOwRuN&Ur-&n|4;{?K2b1if_aHydOlh zro7YyX*DhH_QqlgW?)4gf&#WXj30lZM}?+Brx^y611^E1W1G6c&@bF;9tAc;WU6!yYNucacHb-e)lq}x1}!9NK9y7{=$5-Cg(%*q-J0d#PLagKy>}$ z3~uVZZR;oZDLW)L09a#ofX>{t1W55$#>)%Y$gC&5!PBT2}vI2zI^VpoYJ{MEzQJQ&F8@s$T zhr_pG^ek`lk+bujtBuKq;d&-zw~uu_`MrolJ!u6euYPh?^L8(g7+IMB5gzKU9B$0v zQzqc3@P&OZf8)KfdTFK8*I4Nh$bMFTE3yvSIcbXv*)2~T@xbVkMxnq#IEo;Fke1@FS4 zm|suf!ocZYF(|SGwd3W@tlD|bR4sNR^SC&TvX`~@*PzJT-06)QWo?!otqXRSt(rI^ za+4m*_Zu?50$48;J#2u51(oS{3R+`@eg!fxl$<71YTx}bA4QPS>FRu%R48)({{j|c z#lma(B9vK2H+gUOQpEPU;x7WmHlyC$;?>%Y-Brf?#9KJ!Yk1?vON7jII|~IH7E~14 z#(C=>%Zp(rmavS84(V@M={TAZrU^U z)-zTnG?ft|Z{0nB*y^5$Q7p#-2eLBH>o8+re|6)z^LJMgEySjH8s zSz%v=x_-q1JiCdVfQpY=t=l6cx5{ZmShkoqbVCewq)VzYcs0**bngzs61@xRBS5Cc z!lwC$O8}eVo#-FD{WyG&_g?^%QungUP3r36kveJ%{X6>bx$u%Hi+d~>Ls)!jo-o@$is__AS=BTH4E(+tg=Myd=^g~QcZkqUwn{l!) zjBmA#;CWUdQ+_DJ%(BtMt%yKq47ggWnA`o5|IdbnbrZU%4LMEIY@rUW7Q3za;BQmK zU}#hMtD`sKMPzd&&Jwjl8RlNbjFwTQ!kB~?#~8R1qY7#Frn=(!w6(Pn`;IvvJ`@$c zOZh~@yYP!h0CCqxT?KN%J6v>kr&b;B9+JS3G;M( zNzdpRi+0Q)6WLd!m3Atil6Mq8>C?X* z{p_uap6>b`aw5VcX+C-wbz zOqu~;BUXIwXjvM!*W}mCjRHv+%q8~mv@=bO80`@}!9cc9;|lY=s(GxK*f!du2=VLtj$AHVSA&t(C^;;!1!a;BFj<+L=%U^@z9&xH zH2RWM%N1o9B1L}7(U3u!Q40I;)so(^ofXMkq%tz!c_lzZBCR@;9Fw^a_3N=)moHzt z`Da)c{R|RG!35irV+p)h#LCRtr&pFA!QVbk!{ViV5iByW;@>9=FsBe>%NywoytMT z<&9geo<_lVv3Rrg20~V$Zi-E`1!^9vhadLh|I-P7NVk&jbMth8q+}^uF=@LF4|7u; z)wYc;!`N1FzWX=vejZnR_qetbR&} z*eb?3<-~>x#uG`cP;Og8Mc=ook%ZsgiL;HZF))YT2ny7aZdkU{)zy?xIfyzV{OGaB0 zUik#9j2-1PVqO?ZGlp$gY?VFj&Sr?yOd1wJa)e($6wg9^xu-zG24F7g@pX8=f4~kZ z;CWet3^9B`#)Vh{DY*M!)cScxb}K_u(lpA6&pgQD`_R`QqFpC?qdXl1{qlo2qR(Hf zpdr*%BFd0K^_)-};-d5P^OzvkAVZEs7 zg$10(i%XZ_=LApKlb1tNfNfO`W4*7-qS$DKdNFe4r~rd#Z)g2dP^( zY+7?uOfN)S{*}{iKS^??h8DgOx#q1Xk^4l)TNL=KGGLH#c^i%Y+cjq8N(y^8kj;Yj z$D4p@8PKHf-7R0lZ<4*v2Fmi4F-pcgiBE~^+ILIKuMcnO`cBJ-x8}mjP&_*Uaf1C?u{O0$Hc~q>0od;7{%&F4=;t*Juf0{ ztsj=js~z*8GZ&sgWC!sKC4Jv_hvypCc72mg1_;ATzp2n+S^TZDKd`9o=ZWIsb4O)S zQsLvPC2sy!za%51hHN+35^Lv3SQuh?N}W?YutuNE?>=;%tM`PdMVUFD1~yvS4h8y4 z24#Ths>Sx2b@Ow{FEI}-7z`I*^Cl?1U{Q|sA1)Yt5~u7_gh2yD$Jwas4*kN9??h9N zvW1SPh0XXj$;*3n_D&t~URx|dT34rw86Z;E7aFmlKAKS#0j8HL4F=jdf{{2Ey@oG@ zA+W;9qtcI!pBNn_bYI-@*D}{WxJHFBCCYLoM!7hXu9mr(3XCJs6i1vAHD35yoX2Be zlUnaRnXO{v_Th0~U9NqueD~n7Ssx}4glO=+D2_qE5T&h6-K5w-TPJtD-%66jBoYJ! zO=wPwCJ}lw!5J69=fa;0k~tUgf(1DV5Ca$N3R$cPSmM`Vrgle+Cfhz-$o#IkcBpOP^W#; z$YKs4ka5ota=ZIL31D8-z687`Mw({n`j!DL(73QyyIfx{Xt)| z_rem@jAkv7R`|1*<5Uq(27r7|9L`UtPcPuD6aowV4^x9*X3C12W8zOko*4@v;D&l6pRGO_5F~BQ=ybr{+vszSq26%m0LRH?Y-G|`rg3zF?-j3JXZ?x zLX;}EHf&0+pwW^^UfDOA=GNBdOfs|kQR%VJUXO!(rB{~0-ok>-Du3e&b;F$`o(vgy zxpUr*+t7aAk#k^bkuO-@V=Ab8s8YUK-DV2W(fhUc=_6~?SJOTAja7g>uVQ7PD6=Wk$Qo(!>yIvTaXvZPN&?HIo+X;tYVRdt@-n%jd8q&`M=8PL>LXU#ButM@3QoiKn$v7hyDumE+kWYNeKUXmDR* zXx4F&rE$f0F#jA{i%xCXZWP1&JreJ!uidl)apa>Ks_P!_L_s5yYG=OL;#Ge95hQ@e zF4ppHpPrAmW?@VqJaZ~E|2}Ja`$n&h4X$ZaD!Jwh`}yJLF4-}+hJ7F9=S@+ffy!UD9~SW+boz`_8I&db|E?hLINOP3OE0ey7`D%n`1#ix|~K4*GDp<)$p)5~=m3El+8#YEZL12tT!!{l1}#jRq} zytLnELIO!87|YojSJ^@2y3@=F#K<#YxPQA}n2F|i4FUYp<3Cy{aiqSEz3;TZyd0j& z*N3R1yvUSd?a_}=U9p%E7bHC`;UE{;Zd_J*((XLvt{z6!%H9u!OA&f;ZIhbS*UsMb zP|rXga!s7duj4Sgvip_r6s@~Wnrl1A>9t;2UHjCQ0a8Vo=|^9fD3?~7;*q$SzVO97 zT6ky*F(J@0?x?gD3X(?;9Nu$+~{(7yhO6%+XM*n2nbV}BqD!UfTEJ?g_I8>+mguf8G z9wef^Td%l(Z%f0&;#*NGzi2W!=Y?O;9$;QZ`K^eUUO&bO5t{~mZ-CuftAKdmnsR!u zeJk54X2ZH8!nFk~*VGLzKZT3%&S1L`5Wt%?$_SAvHhx>`y7gXFP0Fh6t~cIVk)(~< zGjj=}1M^i#Bxy%FU@L&t!D3o{83Lp{J%nxx?*SbD+k>5$`+ zqjTJ}bmsui)mMrt7Eiuoe&5jC#xT~zA?K+N*tj$93t%qC$H1(lR z`~Gdd;1xfm><=bAvGW9;>o~GG(8WVN9^jHZjv;=~l?-{e?cn@M zhcjMsT%WHiivTPY1!dF8oHvf zIKMfHt**a?PB(Nx@>`tKwOd`ej)c~+Mqan%kDlgeu@r2}eu^EcO9DMGtuq4lqv3t6 zy&=Esm^n}be==hC8=gjamv?C$NBNmMo?hTC5<7GWR6Ur7&1)%Vjw<^k7E#B7yPePG zF(T4`9M-!7MF7NjF()t|VRY~~%!? zJn)@)FnhCC!jm?uUS}(9N>>@rcDd33XP8|1)m;ncEy0irdPt_Fh6rg{S3qj^zFK4h z&|7}QfblR)2~Z03Ljkmt1tc97&O72;tfpEs*>YF17ujjwfSZvR&fA*p7{k*3CX;io z%Rnjp;Rx?ifHZ_x_xaI_?u38{7kEDLUgr1ep>{aZcUQ4oS`60)Db$NJPgcE?D#*L< zl9FQMy<-hg6m4gfCgx28+6wxcNLILQ51#PxUUO$`kl3`A$pB9D8zQ5)soSSS%H|xQ z=$Zd1L>UBxZzZ3b6o1|aDtdw;%|YGlUc-ciQ|MqVIBe`r%Mlv5V*3WMucc3R@) zr%u&x;3EO@x!b|VI3K2PDPu$TnJ~KFL&!$pI^9>;k}$d5`5Po49~V511xL7^DQ4Zs z`acoG==6q1a%rQ1mb|2$@CHH0ttInZ>kfy>-)Z_vl0@_ebysmS+BX%q6F2|42gIMQXf6g&xMN^dch_%J2UbLh z^Mjo_lG9%hdF3PM{9gKLttr=z@H^JmVhvJ7;AX2{S0bh~?j=(Tl=KjnB#<-+eSrJ$ zj+Fc4=39qt44vhHdTd91@*e@*WPF=e-Y2clr_m84;LLXEBTrF99L*QL^cMTl}0+EOm zTb#CXDSDa2b*b-AEx0EUl0AP>Wv+$LFZu*bzi$f?-9L#>Pe#u>}?O_Un zezX@S^dOa&V!uUbDNED;4Sbx(x76@y#uaeUSmO|{^_c*AWt7NS; zL%zH-EZ^bPibR)$)j`vWi=p?`z^`;PGACvW{A_sagDNW!4Oy|@S+eZQ#q2GKicsT- zx>t(<{L=k0H%Ci!TRJMDvQztBd*>w-_)?{MEt79}f93!&e*N*^LBQv&0ejaYc3gt}r z%3ZHI++7897M#ei_K9+l(VRHQ4NX~$4O6|A6L&=mFFA?R{JJJPZ;0q`D?$wF?R;*M znW|lWSm2xXAv!E1wcDv~J!EQh+BMnN$cly9K_14WMWC1PHL7}CaF2B+_|vfqcJ0u_ ziGr)+%j#vlt8>KbfbO*)HW1s$roSfVO)|Ro8sUH}6$s{fb7IWf;$kz%NdvrKxDSRtSXhhqwr&a%elJJr4`t6fYZNAQHS6 z2c{8VFD@2e?jV2nhI1OKk?}Yxs{d^XLKy14XkhED%muX=N?Y z7yGQX=0g%OQ!v%8_GNeIs1m>}SC-y}7nIkJ*gZ?;JP7T0@LNIJx~Z7BcJ>hvVuSTI zMtz`jC*F*p=_=WNpl7|7PYKtw9(NNlwfjC|$rWb3E&0oHLyyVn8DLmgr5u5Oi@y+2 z>*QK{_(!B!rVKE63`TkTKQ!G&ofY>~F)%xLM4T;Kt>;== z5qwW0ttTDN{K9&(@Q5m#8Qyl08^_uE3CNiM>ujyKQE~BE@83{7V=4fvOc;fv?e}mm zcn=+7cT4E>@HQmQ6c8i1T41wVwsOEbwzahiE=f7WToUw;^u-(Iz#%K9G^Z z;{OKZBa_cNJ0K@31$g!~GY9F#Jnr6$3F)b0Duc4D5CERrIMg3Fv;=%fI$c; zpe;PnwES`J9RUc64tjBwLbaFg0-s=tKIJY!;6dj11q9<1SIU5}To|2Jz}x9dK*(yB zQ)qul;?0 z#_k(VT!+#X#dND#^(%_CJr3O-qa(&VWKzogTzA+W3swOjMexlhq8mOUfu8ODT%b$d zNbJ_;gYSxBR6r`EF+A6yLp2ECxiK5FD;(WC?yQwdb@<|ER%O1hS<2?DN23>_d)X7= z;uqmnGl$gg*`i++uBk`w$u0nNGWI9u>@DFyjRI_cr4nLm(PHZV`au~?wA>q?rEKyl zWKgW-eXzeGl||?yO@!%m|Ibi9<0Je2zO53I-s`RRsQQ9I{mtDPLgH+o zljlHd9e7TgD+XIqtslt|GmW4rPKvj(u=?K=^1`e>L46HEEqZK$LdSk)#%;lPEYPrP z;~#jgj!f^VXM^i9)rDcHH> z@Vp}}yjNz*QRxiXz;h*Ta&+;iv_n}b4#Hg!^)sYuv-5$;+t(|xc1aS1I+?~QBOW_G zQaHHCw7=ft)WYxX_^1Zqg1!?{ZYHO{X(Ikv!`L{5zUj$&vFMjOwfbo9MYiIM)^g?d zE_XJ(B3Fb1?ScX>E%R4L&Nia<=??gKFRmmUK|d)y&fl0j$ahxV1f6VVF;I9BP95IUp`(a6w28* zsUI&V^4w22VBDBNUVAhe8sidU^_1im{&V}TH{cg8Y!=g7jODHVakKR9rvLB(J)iOmyRWzpZ&lxkw@fU*n5JYtAeQB{I^OD)4+fPlh?I=t1p? z6I~3XcF6VB6WcnpSvn+ZY56HaF+bNB2u?Okt(C+OYfox7fm_X2tDEru$mG3gIz(~X z@AI?t86F)Wa{qIHGGn_;ShlTk&m_NeoD1Ga<6e^M(nVmj)}YWZ;a)r2T!Jv}qpS4Z zc|O)AX>-q@54+C$!%r!potEwNshKIfoT;PMG#rEiTWgQGO7mlh-x%R3vAu;%n%DW* z;flqIqyFA*i}%(Ki}4nPr5Ar92}|--8X}7@hZ z#wqSS0fn@clpw&jqM=H-O`n0yM8T#s0=5n9qYliaLt}31AJhL%iW$`HNwGz6(2PgO zQ_=ggK%b0m~@}^PMOv-|?UQ{IpIXZc9Hn%hOfS3?!KN zMCW~b<#B%O`a$NFMg>L%C4E*8N6>N(dbs1`Cq_5c|8Bxx2J22BIe42kje1ZXw_nAL zI-;Wj5&TMSyzz9c?xWL~r)d3(1%j;7Zk%=t+AJI1Fimo2TzE9A$;K;gOBV37rCvU7 zjF(x^awFwW)SFTG=E+m6IQ9><5-2wGZEapJH6=8RAfy>*@v!JH;8U-d)rC^L1Mv_@ z|H|VrJ!kr7?Ku#6Doayyctz8)5~s- zL1YVO#9~xlgRGzW3^T2C94`FN{BJ!|yE8(l3=$$!Y~9?00cNOQt^K5Gmn*EBPPg>Z z18|%Q*Xyf7SZqxL1F)#536H5)QJV#whFU~hS?M+^Oz)FBq<$$Tv1$^Rh(-QbxgjKE zSW;_#JDq8garCtmVHtu!8&ZG~HYgJ$e?p85!%s|(%}&0HXxXeirD%tu8F4#eE^)6X z4aXcrM1qS}>pW(fC>%v!(_7MpNUmjz6Y~0!;B`mky?Yl+Ueu3ZpDOjish3U7%zJJu z`Xb?NO$i7;I$_d`i>O^j3z6;4Yg+qmn9U&^s^|^NSt=!z8iT)rTmZL3avC| zHP=D=r5lC`4#>ZZ$B8=Jv&& zp4NI8v5xQRux*<)_OL-P-M+`JbbQ{`MysP%kU=f@BX%M6rghbQzyEY1K5R6~&z^eD z>2@XzLOGBF;EXc$xla&d(kv(gJ^?G7)p(J!|oh_ z?mgH{PS+E|gKqNM2iF3e9I2sIuS8gQnZ@c>HHi*VRZ%GRDcB6UAPPh!%+e|luH6c4 zM^C8M$%LmbC&S7f7G>49GF)P=g=Hx68MV4Hje$JRCxoMIO^#E!3`=rzd-88Ef7fx{ zc1x?H#KrtxnLAjg<-E$hD2c)exl38s`|+4zsHA#s1F=;c)|l_8ME=y!;cxq;^4Wy` zn1Bp}OzdXAGINO~(p^#ahi{K ztfK0S>~-Xg=4!U{wA{%g_Z5b6@l)q>W!Cbi70XBE z#SH(QrDDoZq}a*hJS?@Z(uxsoa+7iKS{q$?eAQ%Tir!%A?^moSv?j^p)0e~S;^if* zR5=fnKCw^Ku*CXgD5kIK$AU1r>?9^EIQY4NNF_T9)(S2NY=}L+;{FiAe!wZ4_DXb& zT}WQJlW!$K=F3D_>l`65(sDPW3QqC-OhnIiwXrkx?}#^z%vM7$OyKAr-6<}X-KK}N zC4R|6I(x0{;%;ej&i88qdxtwr?zIee)1NaCW@R2hq%`vP9x(6}jkh3?EJVjR&`l{- zzzY&ZN>NIE-uKUl{`>tg26v;S6fb4j@G=>RI%adx@dSIdj%R`*DKquGvlCCrim(El z9p-Jccz-;5`Mc1kY#(=i29vTeZK8#$;;=w!Qe)YC!Z-@9@JZ{iOHQpG#YLJ`6-emT zgPeYEY}{XHJj+Uq#oa(8eLXTQSBegQMbdL(&~C(NKy)k)T? z#gXE*aJ;8cX>M?n6G_LRvFCSSm_gp?)4^q4SCcdfT`RzBJuP<4!Mnkgi3!!brB^HF zs^Bd8Fc9^;u3%sB`BTcw#xO;Dr^l}^!tn{C1htns0~r>wWr%R~hEu@z%7HE~F#0!$2?v&5=AxS3ce6a$*^T6)Pf;2>Dv1)}V;(cpc z9SJm8v48l*IwXTvOsYTb*NTefZ=?pMvR<((%u=FYo+|O}mn+*zh=MoIQSd6k+0yek zAS)xKo`~B*fnMqmha%^5p3NZmh7V4ARk`Rfb|YCp za(u?_8~*x&$&`rv)o7 z{pCsN%pqAQ3#@k;6SVbone|5=rZQ76QTlDniU*S!Z^EXG;cKPXJlCw2F3cr@+~h4e zJ@d!)2bA52F=s(i5^_bGa4xVg^HJCX;TJB!rLd( z+@5cel_1_>n9tlS9Q_V>gnB--Q)7}aSH+lM+p%@Zy@1YfkRLz2)hU6^zZcK~I(tb$ zcH{7x^RCzn=(tP79>b5$CS=uXcb}40@cjyMXQms7SOa(d&t>{m2Ld`qb6>u)c7$La zW|g3`Cs&{J3d?$O)nPm>Xx>W`=nJw6!vZUOEUpBa4gs49LKc4w|GDFY>}`WNRp?p( zAxK;WbnFRLh_VknUso#Xo#k4-e4qdWbcuEz?cAm%l?Ld#!6n-$JQ_}hIb6?JESV~?Nljez>C+dgxu1GD zbqIdTt@4~b4|z-8T?0s^mN}E8jeXXG3a@?kirK{9m20tHJoY@hLFMapUrl@i29!y2 z?xnyS${vhx0{`qGLc88SiWL{z$*A$ty;1;T7Z9j?c2JqDQ*zI#`b^WAgBZ zxSkIt3`3hGU|ppukQ@%r7o-b`Yp(_(tcqLjfk@L&Za{j9igKnlEw9b929&*tu#hMT z&{HxwE5H<84Jtub?spw@_ZWyBBD%glZ{04Vyjc7iyR}#aAKU@#&Ws$u;wX*<<1yYg zJrDTLWCVmGf@Zw@;%?+ZS(N$+T{N@0Nx!1~;ru>)_i`BE-D|SU9y!bb67@(9rC>2>qY&T`}zD)QBgQ%9;0$j~z*QisIC3JZ` zKHLEKExXL&oe7Q~zzs~Jur|iQA#9-S68||cp^tMKnyWgufi z6?mNJLyRXHs%HNtfUPMxS^Ye0r=G>!+dH%eZH$YDMl>%xF!z>9^86Qe`d3^-bNpY5 zKQowU-(dtqDq(GER;1v!M>`(BHa=(9f%ICpCvEJ?Ts;N~KxUgz-}gozH=)2?v<#TQ zt2Ihb{WnAHL?!MjAnsD4OoRjmsU2!D-WrVC?}cO^y6&_9(d^34xQ8ZoRffiPQ`S2V zkSwcOjjk_#NltwStPlNk;BphkNyH|>$XPOjl!9{|8dRGZ1~AKK-Jcdb+xEy~IQl2* zVBEdjv=zURj;`L;yY|*0&>KmPG=l-Q6qxoylM=y+w7l|j3Vs(G2@z~412kD< z6z(kkKyh3-4nm$a>Qj42iK6!mAnqT!X72rjyZDNwEFjTVknPRj2&;NO#F=i%Q#&6o zNF8r@B&4Cy&gZ+@m(UrTM0Zb4E6 z)GB%_r!#7Bq_Q`&rkJbxTL_^SQBCv#Tb&1az)Zv6Bp{4TEIu>5L*%xS_l4n1d8SK> zr?s0PDkpj-n8p#WJ($(t;(P%-eR}pW!O5s3dpndhUqdrWN@e$P(e?^Q`NPXQzSaub zSr&iMqm)iVUQw#9_*j{E$CZ06HuqxtaU(wSVuZfUyG4&Ar2bNBs_1Y!h z-;KRGyURZl{up26#0*%I>1Oc?P@K*6@=>0Y=w)} z`{`Q=QL-WXMgd-?0=97lbO%9_JDQ|Q=-kzsw(VdNP`7+@JqHJ+X_}^Lg%`-lO*AHE zCHc(x2ZbAv;c{{8l+d$ZVAz-8ZSkl@{-X_9E_-^%OhCQNfl*G4cYfp7uI)Me&4~&f zy9^UsjTz|%#+c+kH_=~Kq+MW1A_9X5I^#~LTS}H3eKq~0`S}V4Ga9GZWFVW!e@;i) z$oI@(X8p2HY~2(Kr8^3DOKH)RGCZwBDIqj|k_!`t-c>$v*rZGBG!qx}~I?SO$e zqVGHY9q?f&sh=j}qVdP>OvL=Lk`D&Ds-KdE+Wwb;Z5xrcZvO%stM0L6lTysdRkg~w z(4qFZ%7tbi+wgke915$90vOyx3TH?xX>#PDLb&079-5XM|D=>EEP#RRkv<(@K8snuOpJ9I=zT{Sp<&21*~7W(YNpp=_g1WRKG zm*N9Z&R3%-JVb!zbaLdQ3YVUy8|E)uC z7@<_fCO{VC=VT)BH*O*|E=4hJc`~nMCS=fj1MzsrLqr)2N!4p;=t^k(lc&!P(c(D{ z^g*Maxb>zOxwZ>6xTAH=X^b^C1yo76R39f$apRBbP#I&6%5IbBx%@lfIm9)BWc*CA zrTv??#W|2O`KewmkIc{ibMQ?LnTsT?@>3d0%F|EFlb5L&ZPR8ty-*m&)Ha?Fchy&N zJlSA^T&-9WD(=S2w)1W?4VY|bxuqmZph=dQ)zc~S&%rmgQ-Q*1mL)A45nR`HXMsK# zFCpYrx3A|~i|5BJgGu-;1As057uE@%YsdbajclPj4JzL1(*kR1`WnArc_tgdeEd1W zVIsZ0?=pb%qqF~%&u?zBaam=)o{NB0Vy7yOREsx!bswCz-hJYc?vbW?eTe~Hf_TNQ8P98x zePeRo+@P-G%{`~eV6C{eYitoaC80 z`1K(b@{{mJ2;n8;5aWZ37xshKb>Zy0jHjI=bGwADWyiOt8tbUPS{j{k%34#jj>dy> z&tyN-`gJU32TzWKeo-&JOWFF=JLfTuqC9Z8c8HrhDnDjA7!%C`9GU?lI3iEFNw8hQ zQcgav_oYyhq2;ZKRkdacoXauAz`xoAPud=8Yv%Ry!LZymZR6D2VY$5CCfurDx`N9E z3P|r~U$CD?@9t>SO}GrsBwmj<)I-F!@k6m;Bc@T7-KDuJ)moO45HUr_gI@5kLY|xQ z3*Wl6e<8+?H-WV^lfmy>??Nee>HQ=svg%934sQ)6+|QN29Hg|u6Sk*#wi;hYo$Qdg zIH+@`8rsjiowcTJCF$rcuiPk*-9SucD{CRXj9G$jcqPTQWruUw@TYE@63@vbXLoU2 z+#OdWIk&35*F_G`wHOcPIy?Spes9$i&|Y^jYA|sXC+4bO^V)<&((#W*Q;Jocf3p=L z+Sz_c2Jg>H=iiT=R_dFO51@qa9#ihg1af{kIyn7avIDM6gP_zTP76Q8_7vm)GYz(9 z=HjrD9>$2|73`G6VWeLE$W?q}Nm%oE2*M&pC+Ylvnjzr-(R7tjQMKWg?#`jR6omol zlvY|=L1Bgl>F!ZLVulbUhg9j527!?nkQ8Y|Y9yq);~u_y?^@?Cti$1bYVZB-hpH|9 z^R3vPxB&Nlx#=$sJ}(w9vqb#|#K*iBe2>Z0X`s&}Y41ggWUd)J_F7kLn%O1SEe7ve|C(V0{o56?>ve}_3%2Vmp@WBd(#DDG;Rr34veCAXr0W%&? z3|!sYwxtc=z;PY-jo680*F=aU`miN_B0#oykve}1Zp%AY#7|H?{laCcI#u4X$4yg2 zPERfpv6KBQF19_6?EXVF2##GfUA6DxmGt=dbqKKw2V8QN!Pz5Bv>JCN7k~9TLvnV+ zAUnqo>Bf}Yf#itJ5c1)wIJHpKsD}$*JiRvwja8>tsXEoi{r5ixx+Z_4+|mm$+6yk` z3iA2x;(sW7v88D6Go&bQH=c_cs`+Wk&+oCz!rL3uG6J-4LQbf_qh_=|%n1(eb8 zIAf)cRv^(~0X0&#>=ewD!D+v&W&am*W<-_v8I)dK5Z1k3QEncpH6J@gFT4 zj32QCh2aw0TD?8td?mz1aewzf$m139rlj`ZO|LZO%KG5X?y&@a!BzjqJ~}7Hep)vc zVoQ8ve9qhQ4m*wj40o+OlyVQ8U`Po|!RvA(}G|svf*vKjqe4l=Clpbo`>x8e$gARFy zlEsa9jQ}&!RqKs3hgrjCl7mQ7bX3};@^{n3bnr@1%jokD&8xiz`qn%QYQdt=u}C8l zZKkzc9h#`>gBWqOVQ<_Gi%n7Vc9ip3f_ecor`HGR3?|MHc?sJP^8aFfI1XCXD6|1E zX{JX0mrAKwFhZCeGcuRZYUF1Ac;Rw%`tvb~vC<1E4MWSSC+zc%`xk$X*AN%nm$-xl zvv>yC|K5AKK4gh{bp^&7<>2MNyP`&*~!!VnIu(Uv58#Al~yvaV9 zL?fjHm3mdgMOC!a+3lbDG!w4l8^@Q-+mB5@?!T_{3pdth<6e>2@;+iFBtVa!Gczn3 z+v-ilaP_JcWqCW=UE#}kR}j;RZ=2%-y3JVqSu8is-Y687=()cv!K zJu%DJBw_))lGH-s!SmqZG`^<4A=%;vT@E~tr*^s1HghU}-ybu#J~1lY&#UHM(ntzV zLR+qr+g^w{yggh{p>*|ae%KRJrmru9R)CJ~dgY+fHGVWwy-kYujnOv1snU64LR7Bm%nY{8XcdCs@D!47;b=``z-U4PpEAsj$tG7<9B!$`f z{w+V3QJNc1u4Ze3sT6?){Bcxpk>R(!YYWsau@bJZ;a;YYG|Ovt$$pG)Uhp;@T7k9| zBJ<{8SAmi{Vt`HK84$Ow6$7EiP5W&wSmQJ(n z^nN?j?#V=Cox7x@64+1y_dLzRNn#|m zWZ`{CEw+KP&KX&8Y8o^l?OV;!>k8Io8!G!LzNe%8FCd(m4VrnfE@$1Gstrs@damS)_5c#p%INtAwx{)n18oXd4i9JEp<@P$}_ zn9Y$xNX;3OLxpSLrc}80rBv7!sqbXqcwE){(mhurG+|Q>o=ray&Q5$qN{pC?UCcZe z`nfDUY!*VsN^wQORJ0SPxFEFqeGS*`akQXPe&zzDCUzRZFh}H0Z!~TzKAO$K)_}Mf zw2hGQkT3OiG=9}Wd!f@7n{_UmV5A5Rl#uH7E7ciPaG-{y{idya67mk>oAQtocta{C zrO9Tc&sAZ!!mMC^U$+OJOTKEADnEsQJ1-M)4MtwvTa_5rXOkJ3nMNz*O@-6`x+hK9 zl(~G#b!VB;n|zYp!@54t^To+p4*HKcL61iLDrjh(!!&Ay8~wlDb=s~3U03Ls(a7sa;)x8u}5ErPQ|%Q*TD``v3;$Yv5yN!*g&-c zzm(%DE`MixSWS2f>I3*tya$H8*GXqE+A^ajfF++C@m1#CqviG*bNq3sXYK#6QT35h z*$gVW?(Y0CUmSNdItvhFrdX*@=(MTL3ryaos4c-YG~_y*Jlb*Taora%w^4D*`bEqt zP(Ym_@`NTTAPJ2I^7L3X`HB>@(3DMu1a^veEB!ZD@tMx zNeG*<;~zbwiurgTUNzf!=5Uc3fwC#<_PF3bK*7wKki+5kz+*TJb!F8%sc>^khzjlF zU97AOPn?<7>4Ek=ZYNX3XU8%ng`b-~aY_53S!@_of=V}d)*(2r{HDR5*W88Q_GTu= zuum&r=bJFI^W@bU}=+NH|#?>LHqfxT`&CsIwJGu zpphi65p7xcodh|Zt%#nJjdMl+^^SgqxUQ2uP^4*;VNYk;&MqJK71r=PHg@6W8MPwG z6ye)GYxY9kqvd%x>iYKm|CUkAcy?M#Lio{HxKV+nQDgoH#Ok*0qzM8_=uS>61i{)1 zK=EEo`3hgVaM$$k6h$5tT+kc_r^nWz2@+_vKV_!o&?QUFk4oQiEaOE%GKehkuH;4S z@v4be#y@^u_U>%6WJAHVzSUc$v$s}lPSC-8_MdjATa;9Jj!-dA$@|d;Q5N!R4fXl|Ctg1HsW2zr{#}VL{3H{gHj}^4 znSN%J_}YXsLGtkD;#Fs{DU{2)lmP06!+uZkkpQ@g#~Z^TDXj#Wn0tv`K4%U8FEZ)C z_2j+`X}+_$pd~=XTvIVO+e25Do{`a}|w zqgPslF!p_EfZ*8l>9Q5MNC44D+3(+F~WrqU`*{S&pC7 zg~Q`wReF@{6a!wo`7s1KHIH^k(F!IfWjYnk5={7D8S|bgpZ{xHpP6-gHu%;-%M#cP zR3s&9Xf!2gYd>wc{Cq4QZztO7Rsoy7=lav8*39A@$qvuAeJ2TCvyW25ET@A82N$;l z4SB4X*!k%kkKvV(4qM48Q`s&k2 z*WbuNrxE2Iri#yRJt{qgok)7!MTe_0IitA)Wg&w{9}ch2ae%w2r*^ed?~($^ir3c* z{_E@kdt0!mJ@q)zo+I(*EvdwExN+wkmmb$u>#p)h!zF$7WcjrE6JNJ}KxO0k>rir! zjdxsX9Mmd#&*1*A#p#xXhbq+)65xW6T7!}6jnaJzn&$K1sc=gCRMnB|fdYlRK(E0L zaStBngeuxeZyh!zvjihM^rz)xjZWvh=gj|0w*h_+jK$IZ2eR`_KfSN-IqrG}E@zqv zIIu4o8+RD+-Ml#|S4C8{0RxMtEH{6VeZ~hly|Q#E$RMfAR627k>fKqZXFJnfalxel zRn&TpWKA!Kc>jEfRshRXeBh=5FSmjTEKKYq`zs6|W*X>^{++`|Dj)iat47ebCb((v zsK)PH_RBd|jpSm+V{6vM12m_?Pc_?CGbnuK99G)igG?7~&m3nxsxKWsXa)Ik8|zC?Thp3Rv^kx>*&)Db^=T($_8Nuh{&(~r$A3s)Wj%Ex{6r?`O)$37fJ`qx zDiFW>JxQ70RxPh#NL0PjONX(yBHU=(31yDx9XnN-J*rO;nh{ei31+P~=j=h~^N9If z2mdZaaA{u~YCBTbF4&7!jI)Bq_XaNK6k8HnIV(|~5wzU`tYVQ~8rgf<&2;MexJmK# zBM-L_#{|p=*nfeDR)g$Xx|WWv|9ouDdadvwNju7< z+n{JQ?}?V9LS3N2YG^6nx2*bUEFWE>eX)P&D8;7Us-kTaJ2(~#%W;x2br zl0fiPhdKz^Z-k{ehOk*;G2!&mtr)}BxY2_VUY_dW>;fv_ps-4}K4Z^e-tx#3w zt@D;Fq#c2mWy+nEKF77L81#&bI!rxdIk<{tZS9`t>)wB504}VD0A<{x`oEYg0q10R(Jx-e3x z;Z!e(IJTEWOtexMgl_Ple2bNaXWs&hg*<bK zKlOR7H=4UFZIO-(vV{kxUhxacF|Xz3BU?sDfq5$eBG(jIZ$yA$>;|LmK)q-C zoDIZ38etu2)hBCzrOqp}o}A%fLW&pK$;yv6LPR9&y3>W3>Dl_4IOCm0TD6Qe{T3B% z$1!z`nl$cnM#i^ZPv@~KoqVYjiZk~OUvaMnElySYgkV|XY|(&fxqcak&!i9hs#9P* zdn7B?>-qFM0`N`+gV73hq?i*41~@3CzsW(~`HYDE;lmGtsbbY?>V*0Bss5yv-gsU= z=80aP>6G?`i$b?D;70(h&h-?8(1m0pg&9ZZJ@-$AP!)Kz;@kx&N$E#!VQl=k2w+B!ImbUPOIkp`|^>)T&>vBh_!%+7wsWHCQCJA$9})Xk&bE7?d7&; z-V9(m!`+iQQkZ=BjUH@|Hz67~_~zp$M*-Y%>UD+LnARtaU^imUNHy*tF;MH?_0tUp z!HG&ADFI1mwe@$*-ocX3Sd}pM><+OnhTX@t$Y?Cep0bw+>R}I)A#(|)8UA!h3@BSmK?j-s zx=Y_Rlo$MF`60cA)X?xm9X1gm>p!5;a=wV)BOEK`$|*&$>#tLprg2^hMr%}7LWsuq3B|8`tZ@&E`wbo;WjNPB!tro|KEW*zFn1sSgc9Jv_F# zl+4K&i%*<=ntcLm_iboJ-&4X3J}Z(l3Bbe-FW~DUU{d3hGm28M(v)Iz+zp}UwQW+I z9U1BuNe1_~$v}nQd#(rWO!J2|B&gWxrGIjCougfTG&eZqiG&1_w=P(}oSkZNcvglh z7~{ua;HsvRy?~LK0ui}F;ms`O{A;pp`^*=u(Fg6ndBzfC+Y>eOuM-S*p!~*DXBO{c zep#;s0k7Ik6s`9-^p>FH^4iFos2{=pZ13A!7*K~7@^(LsB{hs+;#d8za=)`}I&_|; zkmz%w^}nV^pL&xBUD`n8e@kf&Td%mgLga8?s3nBbNaHji+cQqrwddeuKg+NBtxz2U zlmg?#`s;KXe#1>hR&&gLV} z7j0x{7oy2-f%6ABr#WNWV_YP3R1KZ)1`i_Cmi#q-%=?|Q$AkYevCCBI0PH}e1uUaA z!6M0NXsI0jSYS$j*L(imnPF1bhjiGB{{2O^Jb~ro4c9=5ns{xSY=2A+nbXX zF`usGtD|lzFneTZJ_*P1Ltq_06?>+R)obvh<7m zmf2h0SV3~rO2v9l{lOq#$sMQTQ-2Z)_3I!9gPhetWS`E-6lw`YZ0u4saaUwYQK8h( zRjgDzb|IP6_K1281(WxJ?VZ2E`yj4H5|FwF5VFVE(KsE6ASr5TGHDVJhfJeUcmmEt z3G@|Mom1C3RG_}Kq?}fN@E8RINFdffVBZtykmp0V(}%YUzs)iBi!)pKG6_bH+yJ>a zB9?mpwdUxhDTJ6h9LXDZcDx~sWIb#}Dy}hWV})Z$_Wy^D68II=#29_zUefzOR6sGEuaGJJNIYCHwezm~MD_ExXvBAhgvvt+;clVGp7Q8CNW&-bK^a=Vv7RSPt3X!% z_e0R<*85Slr1PHTJCA+G(Q*>`n4N`(+D&d2-RZT*A_{R;>9l;aX;`bO1iIebW9&u# z!lO$9DQaAbqQkxOMm_1)@*tVClxRbg1xlY8$$@o6Gfm5M^@zry{lf+kNQ6yVikbLL zQK#5y4&M{Ho_*>i2Nmsaa=Wu!na!&YY>?qB0adYqSxaEd)uZ*6%!BI}lCL=T>#lL2 zsT!w@L(Fa#Eaz#XK`E+tjo8f=_Mqx?B;|+NHWCT7wPybvHGKC?e6%YGYcdpJx+iFZ zJtL81=xW7c`5X2G>#&#yU+@-O#FxBuM|Pm>0Vc;R^HoG*wC@+-(tpkqRCsH` zTNn?FjXT^=+<5oZ$Jti~3aB3_E~c;|an-skq1XD5 z943WEQ(cIc3wU{k%Wv@fP-r?ii1{=E{;_<1)6Pi*!@k+B==-oSIod#f<4PEEycDX2 zlnK6u-Dg0)zCLI1+dQ#X3yBFtoeh@}V`}u!gX`?R%)v7fm+)Wvg`;kNM_hF@{ zBHFc3tNge;piF0Q_W;q?+|WIF#55(b)6yrCeX{h_SZcg@ci(U^+GokO^Or_h)_j5y z`>VXRWg8e+sLJNL2JuDX3(@BuH&NM&n`nV5kk#kY%!+o6%D=$`jrPBZFPLJFyADlz{>c52O-+N-GyM1IED>qMq)a<75_x|0;pY1`(DF1x z&o}^&WS2GU3h^Ji(z3R)4&=S5AzG2-r^J4!0pSgQ=$TCiciO+`N{*e=&=Q^&JN0gl zwFA{WwD~^GT|<&q6o+(pg1P#QdXelc^G}-LkDO%7)B^r%zwODz5CP|q3LJ5&c_atv z71&Ui6P8R>5}$SO>uF7bSmqZ7vEdi?vg$NEYTrb+?dEVjn4QCjR)S6nWiUDFun7dX zJ^9YXjP9}j=;3Wrv2F~LL4vQ{L9F*5)w{B?A@KK(L(w2Z!?lnHFgJuc_LQ>Y5z#g#hmz5TuH93as(7w`B)ncH z(8$dLDPv&y@9ZDM1irNTw8i;cs}*!Qem>L4RH!x*$EApUlMO{I^`#eVxZ-cADlFhq zavJ@iD#Ps+h~Wdqe<8b<{ z7WiXwvWZ#1ZCbv%y{tteOK63JvGkE-mX#wI_-%6166OiFb9i|{?;U2>V$Dry`iVXg z5Ho}6h*e+z>>fdjvY+K`o>gz=j(v*ODtgYU?uW@KC1UnB`d_fmu+G%7OXXB_`{E`C z51G*ZmKjXhqSwRAPrzAoLRIwMZL9}y>gp;(y=tXM3ha|dEu$b-y);(0BVr7(UpPEi zX-=BH=E=NxTt-4+`gjUn;kdBrdQXFSt?>zsI%`e1%cg|F2Pvu|x|iyO_HuW)Xj2B7 z1u~T~xyT!N%#LKIe{B-g5}KIcQd{pJKCEPQ*S!(YcSlO@Rs?MJ9j;&6U8B->^EIwc z#fRG(J#WIs$(HW8SoJ-{Y9X3iW;ggXg9m=gjYM#wH=Bp!VhCZiQnf?MVdIeRKS*}4`G zcyM6>Ks5#(kd(evq_FBu#WvlLHN~?RvPSeI?dij{?p$}5RgSjCv!sXrma7~Cw4^3l zRS5wKel))}xx=b%ncdgZlmU48txz3)ljW!u+*A8eQ}>3F2VE!I$N+!?^_2NLu80ak zPu%k$AIM}e&L0BRv^v=&{+RO{3(5%z^k1!whzXHwu_}hzmjc_b=fv&a3;q%#P;kE? z2f=*IO&`nD^9kzZdimMvRkLnqWZkWwMA|pu{BCl1{u%16a=uh2QN^@Yq}+xm(IH3^ z9e%NI%2dX%9Jst8PrwPCq&od{XOlHX^e(_i~<0Y%m-B4);RxFt{$pXP(>WZfF=V#hky8D$@z7j4}? zNvT;BTO`Q}BfcrFB;l}|mqL-{jV3bPT&mGX!SJVSI6Aw|c(kbqm>w7$Y#6z=>1C!0 zQLM1TSBpOYjrlK8KnZC@%$hK0v#L2%I{-K{A_`BFs-&B6L#v|Hm`aDGe@SAhu;$`& z?EF3-M*(WntqKZG+?wU4{7d3>mMlHr6~WF0p?v$Bd$9~t|Ge@0Z^vW(#CI|#q;Cz0 zT_ho_etOe!jg?3Uk^|?86N5Awn}(NXrHBpxXI;Xe&(qaRObwk^8O+V+Y3hC0!xQa-dOUe$xR6@1JU=hwCl0jyjWC`66nz=QTAXW*t-5r}6<6Ixmc zN19Xp|M%yPX*O{|bWC!CbNR2xJlW34ja!`e z&~DVw%Kql)`dmxC)xx>CfQ;^tTUXV~zpD!gpp<9UJ^|h7jZYH`8M`Nq`? zoI$rE2lr=agx%<|tM8&RZ&3CW z$VYK2Oy(}?|7p)9ShZq%P_3L&)XRa7jY{B@_W1UqX!%s`r1#Z68Bj8d#~M#l!^W6^ zQocQ4ZY)=~W!pQ8>SjRO?UU@4fD216Kk5aQyG%nqmw{_5KhnYkjPRae>-FB`WLR$c zz4i%p>0WDUXu%lA6|fGm6U+SKI#Bc=uJ*Pb)F>`FrH4Ko$T-8fc