Skip to content

Commit e75ed7a

Browse files
prutskovpitrou
authored andcommitted
Move readCSV bench in converter_benchmark (#6)
1 parent 1c0814a commit e75ed7a

File tree

3 files changed

+78
-109
lines changed

3 files changed

+78
-109
lines changed

cpp/src/arrow/csv/converter_benchmark.cc

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,96 @@
1717

1818
#include "benchmark/benchmark.h"
1919

20+
#include <fstream>
2021
#include <sstream>
2122
#include <string>
2223

24+
#include "arrow/io/file.h"
2325
#include "arrow/csv/converter.h"
2426
#include "arrow/csv/options.h"
2527
#include "arrow/csv/parser.h"
28+
#include "arrow/csv/reader.h"
2629
#include "arrow/csv/test_common.h"
2730
#include "arrow/testing/gtest_util.h"
31+
#include "arrow/util/timestamp_converter.h"
2832

2933
namespace arrow {
3034
namespace csv {
3135

36+
const std::vector<std::string> formats = {"1917-10-17,", "2018-09-13 22,",
37+
"1941-06-22 04:00,", "1945-05-09 09:45:38,"};
38+
const std::string file_name = "__data.csv";
39+
40+
const int32_t n_rows = 10000;
41+
const int32_t n_cols = 150;
42+
43+
class ReadCSVBenchmark : public benchmark::Fixture {
44+
public:
45+
void SetUp(const ::benchmark::State& state) {
46+
generateCSV(file_name, formats, n_cols, n_rows);
47+
}
48+
49+
void TearDown(const ::benchmark::State& state) { std::remove(file_name.c_str()); }
50+
51+
private:
52+
void generateCSV(const std::string& path_csv, const std::vector<std::string>& dates,
53+
int32_t cols, int32_t rows) {
54+
::srand(777);
55+
std::ofstream file;
56+
file.open(path_csv, std::ios::out);
57+
58+
for (int32_t row = 0; row < rows; ++row) {
59+
for (int32_t col = 0; col < cols; ++col) {
60+
file << dates[rand() % dates.size()];
61+
}
62+
file << "\n";
63+
}
64+
file.close();
65+
}
66+
};
67+
68+
static void readCSV(const std::string& path_csv, const csv::ConvertOptions& convert_opt) {
69+
arrow::Status st;
70+
auto memory_pool = default_memory_pool();
71+
72+
std::shared_ptr<io::ReadableFile> input;
73+
auto file_result = io::ReadableFile::Open(path_csv.c_str());
74+
st = file_result.status();
75+
input = file_result.ValueOrDie();
76+
77+
auto read_opt = csv::ReadOptions::Defaults();
78+
auto parse_opt = csv::ParseOptions::Defaults();
79+
80+
auto table_reader_result =
81+
csv::TableReader::Make(memory_pool, input, read_opt, parse_opt, convert_opt);
82+
st = table_reader_result.status();
83+
auto table_reader = table_reader_result.ValueOrDie();
84+
85+
std::shared_ptr<Table> arrowTable;
86+
87+
auto arrow_table_result = table_reader->Read();
88+
st = arrow_table_result.status();
89+
arrowTable = arrow_table_result.ValueOrDie();
90+
}
91+
92+
BENCHMARK_F(ReadCSVBenchmark, ConverterISO)(benchmark::State& state) {
93+
for (auto _ : state) {
94+
auto convert_options = csv::ConvertOptions::Defaults();
95+
convert_options.timestamp_converters.push_back(
96+
std::make_shared<ISO8601Parser>(timestamp(TimeUnit::SECOND)));
97+
readCSV(file_name, convert_options);
98+
}
99+
state.SetItemsProcessed(state.iterations());
100+
}
101+
102+
BENCHMARK_F(ReadCSVBenchmark, ConverterDefault)(benchmark::State& state) {
103+
for (auto _ : state) {
104+
auto convert_options = csv::ConvertOptions::Defaults();
105+
readCSV(file_name, convert_options);
106+
}
107+
state.SetItemsProcessed(state.iterations());
108+
}
109+
32110
static std::shared_ptr<BlockParser> BuildInt64Data(int32_t num_rows) {
33111
const std::vector<std::string> base_rows = {"123\n", "4\n", "-317005557\n",
34112
"\n", "N/A\n", "0\n"};

cpp/src/arrow/util/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ add_arrow_benchmark(int_util_benchmark)
7676
add_arrow_benchmark(machine_benchmark)
7777
add_arrow_benchmark(number_parsing_benchmark)
7878
add_arrow_benchmark(range_benchmark)
79-
add_arrow_benchmark(read_csv_benchmark)
8079
add_arrow_benchmark(thread_pool_benchmark)
8180
add_arrow_benchmark(trie_benchmark)
8281
add_arrow_benchmark(utf8_util_benchmark)

cpp/src/arrow/util/read_csv_benchmark.cc

Lines changed: 0 additions & 108 deletions
This file was deleted.

0 commit comments

Comments
 (0)