Benchmarks: Revision - Support flexible warmup and non-random data in…

…itialization in cublas-benchmark (#479) **Description** revise cublas-benchmark for flexible warmup and fill data with fixed number for perf test to improve the running efficiency. **Major Revision** - remove num_in_steps for warmup to support more flexible warmup setting for users - Add support to generate input with fixed number for perf test
microsoft · Feb 27, 2023 · eba298f · eba298f
1 parent 0292366
commit eba298f
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 51 deletions.
diff --git a/superbench/benchmarks/micro_benchmarks/cublas_function.py b/superbench/benchmarks/micro_benchmarks/cublas_function.py
@@ -190,23 +190,26 @@ def add_parser_arguments(self):
         self._parser.add_argument(
             '--num_warmup',
             type=int,
-            default=8,
+            default=8 * 1000,
             required=False,
-            help='The number of warmup step.',
+            help='The number of functions for warmup. By default, the total number of functions to run in warmup ' +
+            'is 8 warmup steps * 1000 num_in_step.',
         )
         self._parser.add_argument(
             '--num_steps',
             type=int,
             default=100,
             required=False,
-            help='The number of test step.',
+            help='The number of test steps. By default, the total number of functions to run in the measured test ' +
+            'is 100 test steps * 1000 num_in_step.',
         )
         self._parser.add_argument(
             '--num_in_step',
             type=int,
             default=1000,
             required=False,
-            help='The number of functions in one step.',
+            help='The number of functions in one step. By default, the total number of functions to run ' +
+            'in each step is 1000.',
         )
         self._parser.add_argument(
             '--random_seed',
@@ -236,6 +239,13 @@ def add_parser_arguments(self):
             required=False,
             help='The acceptable error bound for correctness check.',
         )
+        self._parser.add_argument(
+            '--random_data',
+            action='store_true',
+            default=False,
+            help='Enable random data generation for performance test. ' +
+            'By default, the data is filled with fixed value for performance test.',
+        )
 
     def _preprocess(self):
         """Preprocess/preparation operations before the benchmarking.
@@ -253,6 +263,7 @@ def _preprocess(self):
         command += (' --random_seed ' + str(self._args.random_seed))
         command += ' --correctness' if self._args.correctness else ''
         command += (' --eps ' + str(self._args.eps)) if self._args.eps is not None else ''
+        command += ' --random_data' if self._args.random_data else ''
 
         try:
             if not self._args.config_json_str:

diff --git a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_benchmark.h b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_benchmark.h
@@ -54,6 +54,7 @@ class CublasFunction {
     int random_seed;                   ///< the random seed used to generate random data
     double eps;                        ///< the acceptable error bound for numeric stability
     bool correctness;                  ///< whether enable correctness check or not
+    bool random_data;                  ///< whether enable random data generation or not
     std::string name_;                 ///< the name of the cublas function
     int m_;                            ///< the m dim of matrix
     int k_;                            ///< the k dim of matrix
@@ -70,17 +71,17 @@ class CublasFunction {
     /**
      * @brief Fill the random data into the input
      */
-    template <typename T> void fill_data(T *Parameter_0_0_host, T *Parameter_1_0_host);
+    template <typename T> void fill_data(T *Parameter_0_0_host, T *Parameter_1_0_host, bool random = true);
     /**
      * @brief Prepare memory and data of the input and output
      */
     template <typename T>
     void prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_0, T **Result_3_0, T **Parameter_0_0_host,
-                                 T **Parameter_1_0_host);
+                                 T **Parameter_1_0_host, bool random = true);
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {}
+    virtual void prepare_tensor(bool random = true) {}
     /**
      * @brief Execute the kernel/function
      */
@@ -140,11 +141,15 @@ class CublasFunction {
      * @param  eps      the acceptable error bound for numeric stability
      */
     void set_eps(double eps) { this->eps = eps; }
+    /**
+     * @brief Set the random data
+     * @param  random_data      if generate random data
+     */
+    void set_random_data(bool random_data) { this->random_data = random_data; }
     /**
      * @brief Set the params string
      * @param  str             the str representing the params of the function
      */
-
     void set_function(std::string &str) { this->function_str_ = str; }
     /**
      * @brief Set the name member
@@ -228,47 +233,67 @@ class CublasFunction {
 /**
  * @brief Fill the random data into the input in float type
  */
-template <> void CublasFunction::fill_data(float *Parameter_0_0_host, float *Parameter_1_0_host) {
-    srand(random_seed);
-    for (int i = 0; i < m_ * k_ * batch_count_; i++) {
-        Parameter_0_0_host[i] = ((float)rand() / (float)(RAND_MAX));
-    }
-    for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
-        Parameter_1_0_host[i] = ((float)rand() / (float)(RAND_MAX));
+template <> void CublasFunction::fill_data(float *Parameter_0_0_host, float *Parameter_1_0_host, bool random) {
+    if (random) {
+        srand(random_seed);
+        for (int i = 0; i < m_ * k_ * batch_count_; i++) {
+            Parameter_0_0_host[i] = ((float)rand() / (float)(RAND_MAX));
+        }
+        for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
+            Parameter_1_0_host[i] = ((float)rand() / (float)(RAND_MAX));
+        }
+    } else {
+        // memset the input data to fixed float value
+        memset(Parameter_0_0_host, 2,
+               (unsigned long)m_ * (unsigned long)k_ * (unsigned long)batch_count_ * sizeof(float));
+        memset(Parameter_1_0_host, 3,
+               (unsigned long)k_ * (unsigned long)n_ * (unsigned long)batch_count_ * sizeof(float));
     }
 }
 /**
  * @brief Fill the random data into the input in half type
  */
-template <> void CublasFunction::fill_data(half *Parameter_0_0_host, half *Parameter_1_0_host) {
-    srand(random_seed);
-    for (int i = 0; i < m_ * k_ * batch_count_; i++) {
-        Parameter_0_0_host[i] = half((float)rand() / (float)(RAND_MAX));
-    }
-    for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
-        Parameter_1_0_host[i] = half((float)rand() / (float)(RAND_MAX));
+template <> void CublasFunction::fill_data(half *Parameter_0_0_host, half *Parameter_1_0_host, bool random) {
+    if (random) {
+        srand(random_seed);
+        for (int i = 0; i < m_ * k_ * batch_count_; i++) {
+            Parameter_0_0_host[i] = half((float)rand() / (float)(RAND_MAX));
+        }
+        for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
+            Parameter_1_0_host[i] = half((float)rand() / (float)(RAND_MAX));
+        }
+    } else {
+        // memset the input data to fixed float value
+        std::fill(Parameter_0_0_host, Parameter_0_0_host + m_ * k_ * batch_count_, half(2.0));
+        std::fill(Parameter_1_0_host, Parameter_1_0_host + k_ * n_ * batch_count_, half(3.0));
     }
 }
 /**
  * @brief Fill the random data into the input in cuComplex type
  */
-template <> void CublasFunction::fill_data(cuComplex *Parameter_0_0_host, cuComplex *Parameter_1_0_host) {
-    srand(random_seed);
-    for (int i = 0; i < m_ * k_ * batch_count_; i++) {
-        Parameter_0_0_host[i] =
-            make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX)));
-    }
-    for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
-        Parameter_1_0_host[i] =
-            make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX)));
+template <> void CublasFunction::fill_data(cuComplex *Parameter_0_0_host, cuComplex *Parameter_1_0_host, bool random) {
+    if (random) {
+        srand(random_seed);
+        for (int i = 0; i < m_ * k_ * batch_count_; i++) {
+            Parameter_0_0_host[i] =
+                make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX)));
+        }
+        for (int i = 0; i < k_ * n_ * batch_count_; ++i) {
+            Parameter_1_0_host[i] =
+                make_cuComplex(((float)rand() / (float)(RAND_MAX)), ((float)rand() / (float)(RAND_MAX)));
+        }
+    } else {
+        // memset the input data to fixed float value
+        std::fill(Parameter_0_0_host, Parameter_0_0_host + m_ * k_ * batch_count_, make_cuComplex(2.0f, 2.0f));
+        std::fill(Parameter_1_0_host, Parameter_1_0_host + k_ * n_ * batch_count_, make_cuComplex(3.0f, 3.0f));
     }
 }
 /**
  * @brief Prepare memory and data of the input and output
  */
 template <typename T>
 void CublasFunction::prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_0, T **Result_3_0,
-                                             T **Parameter_0_0_host, T **Parameter_1_0_host) {
+                                             T **Parameter_0_0_host, T **Parameter_1_0_host, bool random) {
     int m = this->m_, n = this->n_, k = this->k_, batch_count = this->batch_count_;
     // input argument
     CUDA_SAFE_CALL(cudaMallocHost((void **)Parameter_0_0_host, sizeof(T) * m * k * batch_count_));
@@ -278,7 +303,7 @@ void CublasFunction::prepare_tensor_template(T **Parameter_0_0, T **Parameter_1_
     CUDA_SAFE_CALL(cudaMalloc((void **)Parameter_1_0, sizeof(T) * n * k * batch_count_));
 
     // fill input values
-    fill_data(reinterpret_cast<T *>(*Parameter_0_0_host), reinterpret_cast<T *>(*Parameter_1_0_host));
+    fill_data(reinterpret_cast<T *>(*Parameter_0_0_host), reinterpret_cast<T *>(*Parameter_1_0_host), random);
 
     // copy input data from host to device
     CUDA_SAFE_CALL(
@@ -469,13 +494,12 @@ int CublasFunction::check_result(int batch_count, cuComplex *Result_3_0, std::co
  */
 void CublasFunction::benchmark() {
     // Malloc memory for input and output data
-    this->prepare_tensor();
+    bool random = this->correctness ? true : this->random_data;
+    this->prepare_tensor(random);
 
     // Warm up
     for (int i_ = 0; i_ < warm_up; i_++) {
-        for (int j = 0; j < num_in_step; j++) {
-            this->kernel_entry();
-        }
+        this->kernel_entry();
     }
     CUDA_SAFE_CALL(cudaDeviceSynchronize());
 

diff --git a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_function.h b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_function.h
@@ -39,8 +39,9 @@ class SgemmFunction : public CublasFunction {
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {
-        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host);
+    virtual void prepare_tensor(bool random) {
+        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
+                                random);
     }
     /**
      * @brief Check the correctness of function calculation result
@@ -107,8 +108,9 @@ class CgemmFunction : public CublasFunction {
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {
-        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host);
+    virtual void prepare_tensor(bool random) {
+        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
+                                random);
     }
     /**
      * @brief Check the correctness of function calculation result
@@ -169,17 +171,17 @@ class GemmExFunction : public CublasFunction {
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {
+    virtual void prepare_tensor(bool random) {
         if (this->datatype_.compare("half") == 0) {
             CublasFunction::prepare_tensor_template<half>(
                 reinterpret_cast<half **>(&Parameter_0_0), reinterpret_cast<half **>(&Parameter_1_0),
                 reinterpret_cast<half **>(&Result_3_0), reinterpret_cast<half **>(&Parameter_0_0_host),
-                reinterpret_cast<half **>(&Parameter_1_0_host));
+                reinterpret_cast<half **>(&Parameter_1_0_host), random);
         } else if (this->datatype_.compare("float") == 0) {
             CublasFunction::prepare_tensor_template<float>(
                 reinterpret_cast<float **>(&Parameter_0_0), reinterpret_cast<float **>(&Parameter_1_0),
                 reinterpret_cast<float **>(&Result_3_0), reinterpret_cast<float **>(&Parameter_0_0_host),
-                reinterpret_cast<float **>(&Parameter_1_0_host));
+                reinterpret_cast<float **>(&Parameter_1_0_host), random);
         }
     }
     /**
@@ -265,17 +267,17 @@ class GemmStridedBatchedExFunction : public CublasFunction {
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {
+    virtual void prepare_tensor(bool random) {
         if (this->datatype_.compare("half") == 0) {
             prepare_tensor_template<half>(
                 reinterpret_cast<half **>(&Parameter_0_0), reinterpret_cast<half **>(&Parameter_1_0),
                 reinterpret_cast<half **>(&Result_3_0), reinterpret_cast<half **>(&Parameter_0_0_host),
-                reinterpret_cast<half **>(&Parameter_1_0_host));
+                reinterpret_cast<half **>(&Parameter_1_0_host), random);
         } else if (this->datatype_.compare("float") == 0) {
             prepare_tensor_template<float>(
                 reinterpret_cast<float **>(&Parameter_0_0), reinterpret_cast<float **>(&Parameter_1_0),
                 reinterpret_cast<float **>(&Result_3_0), reinterpret_cast<float **>(&Parameter_0_0_host),
-                reinterpret_cast<float **>(&Parameter_1_0_host));
+                reinterpret_cast<float **>(&Parameter_1_0_host), random);
         }
     }
     /**
@@ -355,8 +357,9 @@ class SgemmStridedBatchedFunction : public CublasFunction {
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {
-        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host);
+    virtual void prepare_tensor(bool random) {
+        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
+                                random);
     }
     /**
      * @brief  Function calculation on CPU side
@@ -419,8 +422,9 @@ class Cgemm3mStridedBatchedFunction : public CublasFunction {
     /**
      * @brief Prepare memory and data of the input and output for kernel running
      */
-    virtual void prepare_tensor() {
-        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host);
+    virtual void prepare_tensor(bool random) {
+        prepare_tensor_template(&Parameter_0_0, &Parameter_1_0, &Result_3_0, &Parameter_0_0_host, &Parameter_1_0_host,
+                                random);
     }
     /**
      * @brief  Function calculation on CPU side

diff --git a/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_function_helper.h b/superbench/benchmarks/micro_benchmarks/cublas_function/cublas_function_helper.h
@@ -98,6 +98,7 @@ class Options {
     std::string para_info_json;
     bool correctness_check;
     double eps;
+    bool random_data;
 
     /**
      * @brief Construct a options object according to cmd or set a default value used to test
@@ -120,6 +121,7 @@ class Options {
                                               : para_info_json;
         correctness_check = get_cmd_line_argument_bool("--correctness");
         eps = get_cmd_line_argument_double("--eps");
+        random_data = get_cmd_line_argument_bool("--random_data");
     }
 };
 
@@ -241,6 +243,7 @@ void run_benchmark(Options &options) {
         function.set_random_seed(options.random_seed);
         function.set_correctness(options.correctness_check);
         function.set_eps(options.eps);
+        function.set_random_data(options.random_data);
         CublasFunction *p_function = get_cublas_function_pointer(function);
         p_function->benchmark();
         delete p_function;