Skip to content

Commit

Permalink
Consolidate size types 684 (nv-morpheus#747)
Browse files Browse the repository at this point in the history
* Redefine `TensorIndex` as `cudf::size_type` simplifying interoperability with cudf
* Replace usage of `size_t` for tensor and dataframe indexing with `TensorIndex`
* Replace direct usage of `vector` for storing shapes and strides with `ShapeType`
* Define a `RangeType` as an alias for `pair<TensorIndex, TensorIndex>`

Requires nv-morpheus/utilities#18 to be merged first.

This PR removes the need to perform several integer casts, and copying of shape/stride arrays.
fixes nv-morpheus#684

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: nv-morpheus#747
  • Loading branch information
dagardner-nv authored Mar 13, 2023
1 parent 1854ba8 commit 193f89e
Show file tree
Hide file tree
Showing 60 changed files with 559 additions and 653 deletions.
2 changes: 1 addition & 1 deletion external/utilities
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@

#include <pybind11/pytypes.h> // for object

#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <memory> // for shared_ptr
#include <string>

namespace morpheus {
Expand All @@ -47,14 +46,14 @@ class InferenceMemory : public TensorMemory
*
* @param count
*/
InferenceMemory(size_t count);
InferenceMemory(TensorIndex count);
/**
* @brief Construct a new Inference Memory object
*
* @param count
* @param tensors
*/
InferenceMemory(size_t count, TensorMap&& tensors);
InferenceMemory(TensorIndex count, TensorMap&& tensors);

/**
* @brief Checks if a tensor named `name` exists in `tensors`. Alias for `has_tensor`.
Expand All @@ -80,7 +79,7 @@ struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
* @param tensors : Map of string on to cupy arrays
* @return std::shared_ptr<InferenceMemory>
*/
static std::shared_ptr<InferenceMemory> init(std::size_t count, pybind11::object& tensors);
static std::shared_ptr<InferenceMemory> init(TensorIndex count, pybind11::object& tensors);
};
#pragma GCC visibility pop

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@

#include "morpheus/messages/memory/inference_memory.hpp"
#include "morpheus/objects/tensor_object.hpp"
#include "morpheus/types.hpp" // for TensorIndex

#include <cudf/types.hpp> // for size_type
#include <pybind11/pytypes.h> // for object

#include <cstddef>
#include <memory>

namespace morpheus {
Expand Down Expand Up @@ -51,7 +50,7 @@ class InferenceMemoryFIL : public InferenceMemory
* @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more
* inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
*/
InferenceMemoryFIL(size_t count, TensorObject&& input__0, TensorObject&& seq_ids);
InferenceMemoryFIL(TensorIndex count, TensorObject&& input__0, TensorObject&& seq_ids);

/**
* @brief Returns the 'input__0' tensor, throws a `std::runtime_error` if it does not exist
Expand Down Expand Up @@ -102,7 +101,7 @@ struct InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy
* inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
* @return std::shared_ptr<InferenceMemoryFIL>
*/
static std::shared_ptr<InferenceMemoryFIL> init(cudf::size_type count,
static std::shared_ptr<InferenceMemoryFIL> init(TensorIndex count,
pybind11::object input__0,
pybind11::object seq_ids);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@

#include "morpheus/messages/memory/inference_memory.hpp"
#include "morpheus/objects/tensor_object.hpp"
#include "morpheus/types.hpp" // for TensorIndex

#include <cudf/types.hpp> // for size_type
#include <pybind11/pytypes.h>

#include <cstddef>
#include <memory>

namespace morpheus {
Expand Down Expand Up @@ -52,7 +51,7 @@ class InferenceMemoryNLP : public InferenceMemory
* @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more
inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
*/
InferenceMemoryNLP(std::size_t count, TensorObject&& input_ids, TensorObject&& input_mask, TensorObject&& seq_ids);
InferenceMemoryNLP(TensorIndex count, TensorObject&& input_ids, TensorObject&& input_mask, TensorObject&& seq_ids);

/**
* @brief Get the input ids object
Expand Down Expand Up @@ -120,7 +119,7 @@ struct InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy
inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
* @return std::shared_ptr<InferenceMemoryNLP>
*/
static std::shared_ptr<InferenceMemoryNLP> init(cudf::size_type count,
static std::shared_ptr<InferenceMemoryNLP> init(TensorIndex count,
pybind11::object input_ids,
pybind11::object input_mask,
pybind11::object seq_ids);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@

#include <pybind11/pytypes.h> // for object

#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <memory> // for shared_ptr
#include <string>

namespace morpheus {
Expand All @@ -47,14 +46,14 @@ class ResponseMemory : public TensorMemory
*
* @param count
*/
ResponseMemory(size_t count);
ResponseMemory(TensorIndex count);
/**
* @brief Construct a new Response Memory object
*
* @param count
* @param tensors
*/
ResponseMemory(size_t count, TensorMap&& tensors);
ResponseMemory(TensorIndex count, TensorMap&& tensors);

/**
* @brief Checks if a tensor named `name` exists in `tensors`. Alias for `has_tensor`.
Expand Down Expand Up @@ -82,7 +81,7 @@ struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
* @param cupy_tensors : Map of string on to cupy arrays
* @return std::shared_ptr<ResponseMemory>
*/
static std::shared_ptr<ResponseMemory> init(std::size_t count, pybind11::object& tensors);
static std::shared_ptr<ResponseMemory> init(TensorIndex count, pybind11::object& tensors);
};
#pragma GCC visibility pop

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@
#include "morpheus/objects/tensor_object.hpp"
#include "morpheus/types.hpp" // for TensorMap

#include <cudf/types.hpp>
#include <pybind11/pytypes.h>

#include <cstddef>
#include <memory>

namespace morpheus {
Expand All @@ -50,14 +48,14 @@ class ResponseMemoryProbs : public ResponseMemory
* @param count
* @param probs
*/
ResponseMemoryProbs(size_t count, TensorObject&& probs);
ResponseMemoryProbs(TensorIndex count, TensorObject&& probs);
/**
* @brief Construct a new Response Memory Probs object
*
* @param count
* @param tensors
*/
ResponseMemoryProbs(size_t count, TensorMap&& tensors);
ResponseMemoryProbs(TensorIndex count, TensorMap&& tensors);

/**
* @brief Returns the tensor named 'probs'. alias for `get_tensor("probs")`
Expand Down Expand Up @@ -90,7 +88,7 @@ struct ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy
* @param probs
* @return std::shared_ptr<ResponseMemoryProbs>
*/
static std::shared_ptr<ResponseMemoryProbs> init(cudf::size_type count, pybind11::object probs);
static std::shared_ptr<ResponseMemoryProbs> init(TensorIndex count, pybind11::object probs);

/**
* @brief Get the response memory probs object ()
Expand Down
19 changes: 8 additions & 11 deletions morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,8 @@

#include <pybind11/pytypes.h> // for object

#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <memory> // for shared_ptr
#include <string>
#include <utility> // for pair
#include <vector>

namespace morpheus {
Expand All @@ -53,18 +51,18 @@ class TensorMemory
*
* @param count
*/
TensorMemory(size_t count);
TensorMemory(TensorIndex count);

/**
* @brief Construct a new Tensor Memory object
*
* @param count
* @param tensors
*/
TensorMemory(size_t count, TensorMap&& tensors);
TensorMemory(TensorIndex count, TensorMap&& tensors);
virtual ~TensorMemory() = default;

size_t count{0};
TensorIndex count{0};
TensorMap tensors;

/**
Expand All @@ -83,8 +81,7 @@ class TensorMemory
* @param num_selected_rows
* @return TensorMap
*/
TensorMap copy_tensor_ranges(const std::vector<std::pair<TensorIndex, TensorIndex>>& ranges,
size_t num_selected_rows) const;
TensorMap copy_tensor_ranges(const std::vector<RangeType>& ranges, TensorIndex num_selected_rows) const;

/**
* @brief Get the tensor object identified by `name`
Expand Down Expand Up @@ -161,15 +158,15 @@ struct TensorMemoryInterfaceProxy
* @param tensors : Map of string on to cupy arrays
* @return std::shared_ptr<TensorMemory>
*/
static std::shared_ptr<TensorMemory> init(std::size_t count, pybind11::object& tensors);
static std::shared_ptr<TensorMemory> init(TensorIndex count, pybind11::object& tensors);

/**
* @brief Get the count object
*
* @param self
* @return std::size_t
* @return TensorIndex
*/
static std::size_t get_count(TensorMemory& self);
static TensorIndex get_count(TensorMemory& self);

/**
* @brief Get the tensors converted to CuPy arrays. Pybind11 will convert the std::map to a Python dict.
Expand Down
17 changes: 8 additions & 9 deletions morpheus/_lib/include/morpheus/messages/meta.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@

#include "morpheus/objects/data_table.hpp" // for IDataTable
#include "morpheus/objects/table_info.hpp"
#include "morpheus/types.hpp" // for TensorIndex

#include <cudf/io/types.hpp>
#include <cudf/types.hpp> // for size_type
#include <pybind11/pytypes.h>

#include <cstddef> // for size_t
#include <memory>
#include <string>
#include <vector>
Expand Down Expand Up @@ -55,7 +54,7 @@ class MessageMeta
*
* @return pybind11::object
*/
size_t count() const;
TensorIndex count() const;

/**
* @brief Get the info object
Expand Down Expand Up @@ -111,17 +110,17 @@ class SlicedMessageMeta : public MessageMeta
{
public:
SlicedMessageMeta(std::shared_ptr<MessageMeta> other,
cudf::size_type start = 0,
cudf::size_type stop = -1,
TensorIndex start = 0,
TensorIndex stop = -1,
std::vector<std::string> columns = {});

TableInfo get_info() const override;

MutableTableInfo get_mutable_info() const override;

private:
cudf::size_type m_start{0};
cudf::size_type m_stop{-1};
TensorIndex m_start{0};
TensorIndex m_stop{-1};
std::vector<std::string> m_column_names;
};

Expand Down Expand Up @@ -152,9 +151,9 @@ struct MessageMetaInterfaceProxy
* @brief Get messages count
*
* @param self
* @return cudf::size_type
* @return TensorIndex
*/
static cudf::size_type count(MessageMeta& self);
static TensorIndex count(MessageMeta& self);

/**
* @brief Get a copy of the data frame object as a python object
Expand Down
Loading

0 comments on commit 193f89e

Please sign in to comment.