Skip to content

Commit

Permalink
Merge branch 'branch-21.08' into repeat_strings
Browse files Browse the repository at this point in the history
  • Loading branch information
ttnghia committed Jun 18, 2021
2 parents 2639f9a + d183d50 commit dbbfbf9
Show file tree
Hide file tree
Showing 79 changed files with 1,483 additions and 1,155 deletions.
4 changes: 4 additions & 0 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ else
else
"$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds
fi

# If examples grows too large to build, should move to cpu side
gpuci_logger "Building libcudf examples"
$WORKSPACE/cpp/examples/build.sh
fi

# Both regular and Project Flash proceed here
Expand Down
5 changes: 4 additions & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,7 @@ sed_runner "s|\(TAGFILES.*librmm/\).*|\1${NEXT_SHORT_TAG}|" cpp/doxygen/Doxyfile

# README.md update
sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md
sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md

# Libcudf examples update
sed_runner "s/CUDF_TAG \"branch-${CURRENT_SHORT_TAG}\"/CUDF_TAG \"branch-${NEXT_SHORT_TAG}\"/" cpp/examples/basic/CMakeLists.txt
2 changes: 0 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,6 @@ add_library(cudf
src/strings/regex/regexec.cu
src/strings/repeat_strings.cu
src/strings/replace/backref_re.cu
src/strings/replace/backref_re_large.cu
src/strings/replace/backref_re_medium.cu
src/strings/replace/multi_re.cu
src/strings/replace/replace.cu
src/strings/replace/replace_re.cu
Expand Down
8 changes: 8 additions & 0 deletions cpp/examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Libcudf Examples

This folder contains examples to demonstrate libcudf use cases. Running `build.sh` builds all
libcudf examples.

Current examples:

- Basic: example that demonstrates basic use case with libcudf and building a custom application with libcudf.
21 changes: 21 additions & 0 deletions cpp/examples/basic/4stock_5day.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Company,Date,Open,High,Low,Close,Volume
MSFT,2021-03-03,232.16000366210938,233.5800018310547,227.25999450683594,227.55999755859375,33950400.0
MSFT,2021-03-04,226.74000549316406,232.49000549316406,224.25999450683594,226.72999572753906,44584200.0
MSFT,2021-03-05,229.52000427246094,233.27000427246094,226.4600067138672,231.60000610351562,41842100.0
MSFT,2021-03-08,231.3699951171875,233.3699951171875,227.1300048828125,227.38999938964844,35245900.0
MSFT,2021-03-09,232.8800048828125,235.3800048828125,231.6699981689453,233.77999877929688,33034000.0
GOOG,2021-03-03,2067.2099609375,2088.51806640625,2010.0,2026.7099609375,1483100.0
GOOG,2021-03-04,2023.3699951171875,2089.239990234375,2020.27001953125,2049.090087890625,2116100.0
GOOG,2021-03-05,2073.1201171875,2118.110107421875,2046.4150390625,2108.5400390625,2193800.0
GOOG,2021-03-08,2101.1298828125,2128.81005859375,2021.6099853515625,2024.1700439453125,1646000.0
GOOG,2021-03-09,2070.0,2078.0400390625,2047.8299560546875,2052.699951171875,1696400.0
AMZN,2021-03-03,3081.179931640625,3107.780029296875,2995.0,3005.0,3967200.0
AMZN,2021-03-04,3012.0,3058.1298828125,2945.429931640625,2977.570068359375,5458700.0
AMZN,2021-03-05,3005.0,3009.0,2881.0,3000.4599609375,5383400.0
AMZN,2021-03-08,3015.0,3064.590087890625,2951.31005859375,2951.949951171875,4178500.0
AMZN,2021-03-09,3017.989990234375,3090.9599609375,3005.14990234375,3062.85009765625,4023500.0
AAPL,2021-03-03,124.80999755859375,125.70999908447266,121.83999633789062,122.05999755859375,112430400.0
AAPL,2021-03-04,121.75,123.5999984741211,118.62000274658203,120.12999725341797,177275300.0
AAPL,2021-03-05,120.9800033569336,121.94000244140625,117.56999969482422,121.41999816894531,153590400.0
AAPL,2021-03-08,120.93000030517578,121.0,116.20999908447266,116.36000061035156,153918600.0
AAPL,2021-03-09,119.02999877929688,122.05999755859375,118.79000091552734,121.08999633789062,129159600.0
30 changes: 30 additions & 0 deletions cpp/examples/basic/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
cmake_minimum_required(VERSION 3.18)

project(basic_example VERSION 0.0.1 LANGUAGES C CXX CUDA)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_ARCHITECTURES "")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(CPM_DOWNLOAD_VERSION 0.27.2)
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")

set(CUDF_TAG "branch-21.08")

if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION}))
message(STATUS "Downloading CPM.cmake")
file(DOWNLOAD https://github.com/TheLartians/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION})
endif()

include(${CPM_DOWNLOAD_LOCATION})

CPMFindPackage(NAME cudf
GIT_REPOSITORY https://github.com/rapidsai/cudf
GIT_TAG ${CUDF_TAG}
GIT_SHALLOW TRUE
SOURCE_SUBDIR cpp
)

# Configure your project here
add_executable(${PROJECT_NAME} "src/process_csv.cpp")
target_link_libraries(${PROJECT_NAME} cudf::cudf)
23 changes: 23 additions & 0 deletions cpp/examples/basic/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Basic Standalone libcudf C++ application

This C++ example demonstrates a basic libcudf use case and provides a minimal
example of building your own application based on libcudf using CMake.

The example source code loads a csv file that contains stock prices from 4
companies spanning across 5 days, computes the average of the closing price
for each company and writes the result in csv format.

## Compile and execute

```bash
# Configure project
cmake -S . -B build/
# Build
cmake --build build/ --parallel $PARALLEL_LEVEL
# Execute
build/libcudf_example
```

If your machine does not come with a pre-built libcudf binary, expect the
first build to take some time, as it would build libcudf on the host machine.
It may be sped up by configuring the proper `PARALLEL_LEVEL` number.
68 changes: 68 additions & 0 deletions cpp/examples/basic/src/process_csv.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#include <cudf/aggregation.hpp>
#include <cudf/groupby.hpp>
#include <cudf/io/csv.hpp>
#include <cudf/table/table.hpp>

#include <memory>
#include <string>
#include <utility>
#include <vector>

cudf::io::table_with_metadata read_csv(std::string const& file_path)
{
auto source_info = cudf::io::source_info(file_path);
auto builder = cudf::io::csv_reader_options::builder(source_info);
auto options = builder.build();
return cudf::io::read_csv(options);
}

void write_csv(cudf::table_view const& tbl_view, std::string const& file_path)
{
auto sink_info = cudf::io::sink_info(file_path);
auto builder = cudf::io::csv_writer_options::builder(sink_info, tbl_view);
auto options = builder.build();
cudf::io::write_csv(options);
}

std::vector<cudf::groupby::aggregation_request> make_single_aggregation_request(
std::unique_ptr<cudf::aggregation>&& agg, cudf::column_view value)
{
std::vector<cudf::groupby::aggregation_request> requests;
requests.emplace_back(cudf::groupby::aggregation_request());
requests[0].aggregations.push_back(std::move(agg));
requests[0].values = value;
return requests;
}

std::unique_ptr<cudf::table> average_closing_price(cudf::table_view stock_info_table)
{
// Schema: | Company | Date | Open | High | Low | Close | Volume |
auto keys = cudf::table_view{{stock_info_table.column(0)}}; // Company
auto val = stock_info_table.column(5); // Close

// Compute the average of each company's closing price with entire column
cudf::groupby::groupby grpby_obj(keys);
auto requests = make_single_aggregation_request(cudf::make_mean_aggregation(), val);

auto agg_results = grpby_obj.aggregate(requests);

// Assemble the result
auto result_key = std::move(agg_results.first);
auto result_val = std::move(agg_results.second[0].results[0]);
std::vector<cudf::column_view> columns{result_key->get_column(0), *result_val};
return std::make_unique<cudf::table>(cudf::table_view(columns));
}

int main(int argc, char** argv)
{
// Read data
auto stock_table_with_metadata = read_csv("4stock_5day.csv");

// Process
auto result = average_closing_price(*stock_table_with_metadata.tbl);

// Write out result
write_csv(*result, "4stock_5day_avg_close.csv");

return 0;
}
22 changes: 22 additions & 0 deletions cpp/examples/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# Copyright (c) 2021, NVIDIA CORPORATION.

# libcudf examples build script

# Add libcudf examples build scripts down below

# Parallelism control
PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}

EXAMPLES_DIR=${WORKSPACE}/cpp/examples

################################################################################
# Basic example
BASIC_EXAMPLE_DIR=${EXAMPLES_DIR}/basic
BASIC_EXAMPLE_BUILD_DIR=${BASIC_EXAMPLE_DIR}/build

# Configure
cmake -S ${BASIC_EXAMPLE_DIR} -B ${BASIC_EXAMPLE_BUILD_DIR}
# Build
cmake --build ${BASIC_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL}
42 changes: 29 additions & 13 deletions cpp/include/cudf_test/iterator_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@

#include <cudf/detail/iterator.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <thrust/iterator/transform_iterator.h>

#include <iterator>

namespace cudf {
namespace test {

namespace iterators {
/**
* @brief Bool iterator for marking (possibly multiple) null elements in a column_wrapper.
*
Expand All @@ -35,7 +34,7 @@ namespace test {
*
* @code
* auto indices = std::vector<size_type>{8,9};
* auto iter = iterator_with_null_at(indices.cbegin(), indices.end());
* auto iter = nulls_at(indices.cbegin(), indices.end());
* iter[6] == true; // i.e. Valid row at index 6.
* iter[7] == true; // i.e. Valid row at index 7.
* iter[8] == false; // i.e. Invalid row at index 8.
Expand All @@ -49,7 +48,7 @@ namespace test {
* @return auto Validity iterator
*/
template <typename Iter>
[[maybe_unused]] static auto iterator_with_null_at(Iter index_start, Iter index_end)
[[maybe_unused]] static auto nulls_at(Iter index_start, Iter index_end)
{
using index_type = typename std::iterator_traits<Iter>::value_type;

Expand All @@ -66,8 +65,7 @@ template <typename Iter>
* and yields `true` (to mark valid rows) for all other indices. E.g.
*
* @code
* using host_span = cudf::host_span<cudf::size_type const>;
* auto iter = iterator_with_null_at(host_span{std::vector<size_type>{8,9}});
* auto iter = nulls_at({8,9});
* iter[6] == true; // i.e. Valid row at index 6.
* iter[7] == true; // i.e. Valid row at index 7.
* iter[8] == false; // i.e. Invalid row at index 8.
Expand All @@ -77,9 +75,9 @@ template <typename Iter>
* @param indices The indices for which the validity iterator must return `false` (i.e. null)
* @return auto Validity iterator
*/
[[maybe_unused]] static auto iterator_with_null_at(cudf::host_span<cudf::size_type const> indices)
[[maybe_unused]] static auto nulls_at(std::vector<cudf::size_type> const& indices)
{
return iterator_with_null_at(indices.begin(), indices.end());
return nulls_at(indices.cbegin(), indices.cend());
}

/**
Expand All @@ -89,32 +87,50 @@ template <typename Iter>
* and yields `true` (to mark valid rows) for all other indices. E.g.
*
* @code
* auto iter = iterator_with_null_at(8);
* auto iter = null_at(8);
* iter[7] == true; // i.e. Valid row at index 7.
* iter[8] == false; // i.e. Invalid row at index 8.
* @endcode
*
* @param index The index for which the validity iterator must return `false` (i.e. null)
* @return auto Validity iterator
*/
[[maybe_unused]] static auto iterator_with_null_at(cudf::size_type index)
[[maybe_unused]] static auto null_at(cudf::size_type index)
{
return iterator_with_null_at(std::vector<size_type>{index});
return nulls_at(std::vector<cudf::size_type>{index});
}

/**
* @brief Bool iterator for marking all elements are null
*
* @return auto Validity iterator which always yields `false`
*/
[[maybe_unused]] static auto iterator_all_nulls() { return thrust::make_constant_iterator(false); }
[[maybe_unused]] static auto all_nulls() { return thrust::make_constant_iterator(false); }

/**
* @brief Bool iterator for marking all elements are valid (non-null)
*
* @return auto Validity iterator which always yields `true`
*/
[[maybe_unused]] static auto iterator_no_null() { return thrust::make_constant_iterator(true); }
[[maybe_unused]] static auto no_nulls() { return thrust::make_constant_iterator(true); }

/**
* @brief Bool iterator for marking null elements from pointers of data
*
* The returned iterator yields `false` (to mark `null`) at the indices corresponding to the
* pointers having `nullptr` values and `true` for the remaining indices.
*
* @tparam T the data type
* @param ptrs The data pointers for which the validity iterator is computed
* @return auto Validity iterator
*/
template <class T>
[[maybe_unused]] static auto nulls_from_nullptrs(std::vector<T const*> const& ptrs)
{
// The vector `indices` is copied into the lambda as it can be destroyed at the caller site.
return thrust::make_transform_iterator(ptrs.begin(), [ptrs](auto ptr) { return ptr != nullptr; });
}

} // namespace iterators
} // namespace test
} // namespace cudf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function(find_and_configure_cudf VERSION)
endif()
endfunction()

set(CUDA_KAFKA_MIN_VERSION_cudf "${CUDA_KAFKA_VERSION_MAJOR}.${CUDA_KAFKA_VERSION_MINOR}.00")
set(CUDA_KAFKA_MIN_VERSION_cudf "${CUDA_KAFKA_VERSION_MAJOR}.${CUDA_KAFKA_VERSION_MINOR}.${CUDA_KAFKA_VERSION_PATCH}")
find_and_configure_cudf(${CUDA_KAFKA_MIN_VERSION_cudf})

if(cudf_ADDED)
Expand Down
Loading

0 comments on commit dbbfbf9

Please sign in to comment.