rapidsai · rapids-bot · Sep 30, 2022 · Sep 7, 2022 · Sep 14, 2022 · Sep 14, 2022
@@ -45,3 +45,7 @@ sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cma
 for FILE in conda/environments/*.yml; do
   sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" ${FILE};
 done
+
+# Doxyfile update
+sed_runner "/PROJECT_NUMBER/ s|[0-9]\+.[0-9]\+|${NEXT_SHORT_TAG}|g" cpp/doxygen/Doxyfile
-sed_runner "/PROJECT_NUMBER/ s|[0-9]\+.[0-9]\+|${NEXT_SHORT_TAG}|g" cpp/doxygen/Doxyfile
+sed_runner "/PROJECT_NUMBER[ ]*=/ s|=.*|= ${NEXT_FULL_TAG}|g" cpp/doxygen/Doxyfile
-sed_runner "/PROJECT_NUMBER/ s|[0-9]\+.[0-9]\+|${NEXT_SHORT_TAG}|g" cpp/doxygen/Doxyfile
+sed_runner "/PROJECT_NUMBER[ ]*=/ s|=.*|= ${NEXT_FULL_TAG}|g" cpp/doxygen/Doxyfile
+sed_runner "/TAGFILES/ s|[0-9]\+.[0-9]\+|${NEXT_SHORT_TAG}|g" cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "libcuspatial"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 22.06.00
+PROJECT_NUMBER         = 22.10.00
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -726,7 +726,7 @@ FILE_VERSION_FILTER    =
 # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
 # tag is left empty.
 
-LAYOUT_FILE            =
+LAYOUT_FILE            = DoxygenLayout.xml
 
 # The CITE_BIB_FILES tag can be used to specify one or more bib files containing
 # the reference definitions. This must be a list of .bib files. The .bib
@@ -815,6 +815,11 @@ WARN_LOGFILE           =
 # Note: If this tag is empty the current directory is searched.
 
 INPUT                  = main_page.md \
+                         developer_guide/BENCHMARKING.md \
+                         developer_guide/DOCUMENTATION.md \
+                         developer_guide/DEVELOPER_GUIDE.md \
+                         developer_guide/TESTING.md \
+                         developer_guide/REFACTORING_GUIDE.md \
                          ../include
 
 # This tag can be used to specify the character encoding of the source files
@@ -950,7 +955,7 @@ INPUT_FILTER           =
 # need to set EXTENSION_MAPPING for the extension otherwise the files are not
 # properly processed by doxygen.
 
-FILTER_PATTERNS        =
+FILTER_PATTERNS        = *.md=./modify_fences.sh
 
 # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
 # INPUT_FILTER) will also be used to filter the input files that are used for
@@ -2166,7 +2171,7 @@ SKIP_FUNCTION_MACROS   = YES
 # the path). If a tag file is not located in the directory in which doxygen is
 # run, you must also specify the path to the tagfile here.
 
-TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.06 "libcudf.tag=https://docs.rapids.ai/api/libcudf/22.06"
+TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.10 "libcudf.tag=https://docs.rapids.ai/api/libcudf/22.10"
 
 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a
 # tag file that is based on the input files it reads. See section "Linking to

@@ -0,0 +1,195 @@
+<doxygenlayout version="1.0">
+  <!-- Generated by doxygen 1.8.13 -->
+  <!-- Navigation index tabs for HTML output -->
+  <navindex>
+    <tab type="mainpage" visible="yes" title=""/>
+    <tab type="pages" visible="yes" title="" intro=""/>
+    <tab type="user" url="@ref DEVELOPER_GUIDE" title="Developer Guide"/>
+    <tab type="modules" visible="yes" title="" intro=""/>
+    <tab type="namespaces" visible="yes" title="">
+      <tab type="namespacelist" visible="yes" title="" intro=""/>
+      <tab type="namespacemembers" visible="yes" title="" intro=""/>
+    </tab>
+    <tab type="classes" visible="yes" title="">
+      <tab type="classlist" visible="yes" title="" intro=""/>
+      <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/> 
+      <tab type="hierarchy" visible="yes" title="" intro=""/>
+      <tab type="classmembers" visible="yes" title="" intro=""/>
+    </tab>
+    <tab type="files" visible="yes" title="">
+      <tab type="filelist" visible="yes" title="" intro=""/>
+      <tab type="globals" visible="yes" title="" intro=""/>
+    </tab>
+    <tab type="examples" visible="yes" title="" intro=""/>  
+  </navindex>
+
+  <!-- Layout definition for a class page -->
+  <class>
+    <briefdescription visible="yes"/>
+    <includes visible="$SHOW_INCLUDE_FILES"/>
+    <inheritancegraph visible="$CLASS_GRAPH"/>
+    <collaborationgraph visible="$COLLABORATION_GRAPH"/>
+    <memberdecl>
+      <nestedclasses visible="yes" title=""/>
+      <publictypes title=""/>
+      <services title=""/>
+      <interfaces title=""/>
+      <publicslots title=""/>
+      <signals title=""/>
+      <publicmethods title=""/>
+      <publicstaticmethods title=""/>
+      <publicattributes title=""/>
+      <publicstaticattributes title=""/>
+      <protectedtypes title=""/>
+      <protectedslots title=""/>
+      <protectedmethods title=""/>
+      <protectedstaticmethods title=""/>
+      <protectedattributes title=""/>
+      <protectedstaticattributes title=""/>
+      <packagetypes title=""/>
+      <packagemethods title=""/>
+      <packagestaticmethods title=""/>
+      <packageattributes title=""/>
+      <packagestaticattributes title=""/>
+      <properties title=""/>
+      <events title=""/>
+      <privatetypes title=""/>
+      <privateslots title=""/>
+      <privatemethods title=""/>
+      <privatestaticmethods title=""/>
+      <privateattributes title=""/>
+      <privatestaticattributes title=""/>
+      <friends title=""/>
+      <related title="" subtitle=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <inlineclasses title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <services title=""/>
+      <interfaces title=""/>
+      <constructors title=""/>
+      <functions title=""/>
+      <related title=""/>
+      <variables title=""/>
+      <properties title=""/>
+      <events title=""/>
+    </memberdef>
+    <allmemberslink visible="yes"/>
+    <usedfiles visible="$SHOW_USED_FILES"/>
+    <authorsection visible="yes"/>
+  </class>
+
+  <!-- Layout definition for a namespace page -->
+  <namespace>
+    <briefdescription visible="yes"/>
+    <memberdecl>
+      <nestednamespaces visible="yes" title=""/>
+      <constantgroups visible="yes" title=""/>
+      <classes visible="yes" title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <inlineclasses title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+    </memberdef>
+    <authorsection visible="yes"/>
+  </namespace>
+
+  <!-- Layout definition for a file page -->
+  <file>
+    <briefdescription visible="yes"/>
+    <includes visible="$SHOW_INCLUDE_FILES"/>
+    <includegraph visible="$INCLUDE_GRAPH"/>
+    <includedbygraph visible="$INCLUDED_BY_GRAPH"/>
+    <sourcelink visible="yes"/>
+    <memberdecl>
+      <classes visible="yes" title=""/>
+      <namespaces visible="yes" title=""/>
+      <constantgroups visible="yes" title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <inlineclasses title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <functions title=""/>
+      <variables title=""/>
+    </memberdef>
+    <authorsection/>
+  </file>
+
+  <!-- Layout definition for a group page -->
+  <group>
+    <briefdescription visible="yes"/>
+    <groupgraph visible="$GROUP_GRAPHS"/>
+    <memberdecl>
+      <nestedgroups visible="yes" title=""/>
+      <dirs visible="yes" title=""/>
+      <files visible="yes" title=""/>
+      <namespaces visible="yes" title=""/>
+      <classes visible="yes" title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <enumvalues title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <signals title=""/>
+      <publicslots title=""/>
+      <protectedslots title=""/>
+      <privateslots title=""/>
+      <events title=""/>
+      <properties title=""/>
+      <friends title=""/>
+      <membergroups visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+    <memberdef>
+      <pagedocs/>
+      <inlineclasses title=""/>
+      <defines title=""/>
+      <typedefs title=""/>
+      <enums title=""/>
+      <enumvalues title=""/>
+      <functions title=""/>
+      <variables title=""/>
+      <signals title=""/>
+      <publicslots title=""/>
+      <protectedslots title=""/>
+      <privateslots title=""/>
+      <events title=""/>
+      <properties title=""/>
+      <friends title=""/>
+    </memberdef>
+    <authorsection visible="yes"/>
+  </group>
+
+  <!-- Layout definition for a directory page -->
+  <directory>
+    <briefdescription visible="yes"/>
+    <directorygraph visible="yes"/>
+    <memberdecl>
+      <dirs visible="yes"/>
+      <files visible="yes"/>
+    </memberdecl>
+    <detaileddescription title=""/>
+  </directory>
+</doxygenlayout>
@@ -0,0 +1,55 @@
+# Unit Benchmarking in libcuspatial
+
+Unit benchmarks in libcuspatial are written using [NVBench](https://github.com/NVIDIA/nvbench).
+While some existing benchmarks are written using
+[Google Benchmark](https://github.com/google/benchmark), new benchmarks should use NVBench.
+
+The NVBench library is similar to Google Benchmark, but has several quality of life improvements
+when doing GPU benchmarking such as displaying the fraction of peak memory bandwidth achieved and
+details about the GPU hardware.
+
+Both NVBench and Google Benchmark provide many options for specifying ranges of parameters to
+benchmark, as well as to control the time unit reported, among other options. Refer to existing
+benchmarks in `cpp/benchmarks` to understand the options.
+
+## Directory and File Naming
+
+The naming of unit benchmark directories and source files should be consistent with the feature
+being benchmarked. For example, the benchmarks for APIs in `point_in_polygon.hpp` should live in
+`cpp/benchmarks/point_in_polygon.cu`. Each feature (or set of related features) should have its own
+benchmark source file named `<feature>{.cu,cpp}`. 
+
+In the interest of improving compile time, whenever possible, test source files should be `.cpp`
+files because `nvcc` is slower than `gcc` in compiling host code. Note that `thrust::device_vector`
+includes device code, and so must only be used in `.cu` files. `rmm::device_uvector`,
+`rmm::device_buffer` and the various `column_wrapper` types described in [Testing](TESTING.md)
+can be used in `.cpp` files, and are therefore preferred in test code over `thrust::device_vector`.
+
+Testing header-only APIs requires CUDA compilation so should be done in `.cu` files.
+
+## CUDA Asynchrony and benchmark accuracy
+
+CUDA computations and operations like copies are typically asynchronous with respect to host code,
+so it is important to carefully synchronize in order to ensure the benchmark timing is not stopped
+before the feature you are benchmarking has completed. An RAII helper class `cuda_event_timer` is
+provided in `cpp/benchmarks/synchronization/synchronization.hpp` to help with this. This class
+can also optionally clear the GPU L2 cache in order to ensure cache hits do not artificially inflate
+performance in repeated iterations.
+
+## Data generation
+
+For generating benchmark input data, random data generation functions are provided in
+`cpp/benchmarks/utility/random.cuh`. The input data generation happens on device.
+
+## What should we benchmark?
+
+In general, we should benchmark all features over a range of data sizes and types, so that we can
+catch regressions across libcudf changes. However, running many benchmarks is expensive, so ideally
+we should sample the parameter space in such a way to get good coverage without having to test
+exhaustively.
+
+A rule of thumb is that we should benchmark with enough data to reach the point where the algorithm
+reaches its saturation bottleneck, whether that bottleneck is bandwidth or computation. Using data
+sets larger than this point is generally not helpful, except in specific cases where doing so
+exercises different code and can therefore uncover regressions that smaller benchmarks will not
+(this should be rare).
@@ -1,11 +1,13 @@
-# libcuspatial C++ Developer Guide
+# libcuspatial C++ Developer Guide {#DEVELOPER_GUIDE}
 
 This document serves as a guide for contributors to libcuspatial C++ code. Developers should also
 refer to these additional files for further documentation of libcuspatial best practices.
 
 * [Documentation Guide](DOCUMENTATION.md) for guidelines on documenting libcuspatial code.
 * [Testing Guide](TESTING.md) for guidelines on writing unit tests.
 * [Benchmarking Guide](BENCHMARKING.md) for guidelines on writing unit benchmarks.
+* [Refactoring Guide](REFACTORING_GUIDE.md) for guidelines on refactoring legacy column-based APIs into 
+                                            header-only APIs.
 
 # Overview
 
@@ -200,7 +202,7 @@ respectively. All memory resource parameters should be defaulted to use the retu
 
 This section provides specifics about the structure and implementation of cuSpatial API functions.
 
-## Column-based cuSpatial API<a name="column_based_api"></a>
+## Column-based cuSpatial API
 
 libcuspatial's column-based API is designed to integrate seamlessly with other RAPIDS libraries,
 notably cuDF. To that end, this API uses `cudf::column` and `cudf::table` data structures as input
@@ -321,7 +323,7 @@ auto foo = [&out0 = out0] {
 };
 ```
 
-## Header-only cuSpatial API<a name="header_only_api"></a>
+## Header-only cuSpatial API
 
 For C++ users and developers who do not also use libcudf or other RAPIDS APIS, depending on libcudf
 could be a barrier to adoption of libcuspatial. libcudf is a very large library and building it
@@ -532,7 +534,7 @@ libcuspatial code eschews raw pointers and direct memory allocation. Use RMM cla
 use [`device_memory_resource`](https://github.com/rapidsai/rmm/#device_memory_resource) for device
 memory allocation with automated lifetime management.
 
-#### `rmm::device_buffer`
+#### rmm::device_buffer
 Allocates a specified number of bytes of untyped, uninitialized device memory using a
 `device_memory_resource`. If no resource is explicitly provided, uses
 `rmm::mr::get_current_device_resource()`.
@@ -558,7 +560,7 @@ custom_memory_resource *mr...;
 rmm::device_buffer custom_buff(100, mr, stream);
 ```
 
-#### `rmm::device_scalar<T>`
+#### rmm::device_scalar<T>
 Allocates a single element of the specified type initialized to the specified value. Use this for
 scalar input/outputs into device kernels, e.g., reduction results, null count, etc. This is
 effectively a convenience wrapper around a `rmm::device_vector<T>` of length 1.
@@ -576,7 +578,7 @@ kernel<<<...>>>(int_scalar.data(),...);
 int host_value = int_scalar.value();
 ```
 
-#### `rmm::device_vector<T>`
+#### rmm::device_vector<T>
 
 Allocates a specified number of elements of the specified type. If no initialization value is
 provided, all elements are default initialized (this incurs a kernel launch).
@@ -590,7 +592,7 @@ utilities enable creation of `uvector`s from host-side vectors, or creating zero
 `uvector`s, so that they are as convenient to use as `device_vector`. Avoiding `device_vector` has
 a number of benefits, as described in the following section on `rmm::device_uvector`.
 
-#### `rmm::device_uvector<T>`
+#### rmm::device_uvector<T>
 
 Similar to a `device_vector`, allocates a contiguous set of elements in device memory but with key
 differences:
@@ -632,7 +634,7 @@ group a broad set of functions, further namespaces may be used.
 Many functions are not meant for public use, so place them in either the `detail` or an *anonymous*
 namespace, depending on the situation.
 
-#### `detail` namespace
+#### detail namespace
 
 Functions or objects that will be used across *multiple* translation units (i.e., source files),
 should be exposed in an internal header file and placed in the `detail` namespace. Example: