Skip to content

Commit 1c0814a

Browse files
prutskovpitrou
authored andcommitted
Add bencmark for timestamp type converters (#5)
1 parent 48d4e55 commit 1c0814a

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

cpp/src/arrow/util/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ add_arrow_benchmark(int_util_benchmark)
7676
add_arrow_benchmark(machine_benchmark)
7777
add_arrow_benchmark(number_parsing_benchmark)
7878
add_arrow_benchmark(range_benchmark)
79+
add_arrow_benchmark(read_csv_benchmark)
7980
add_arrow_benchmark(thread_pool_benchmark)
8081
add_arrow_benchmark(trie_benchmark)
8182
add_arrow_benchmark(utf8_util_benchmark)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include <arrow/io/file.h>
19+
20+
#include <fstream>
21+
#include <string>
22+
#include <vector>
23+
24+
#include "arrow/csv/options.h"
25+
#include "arrow/csv/reader.h"
26+
#include "arrow/util/parsing.h"
27+
#include "arrow/util/timestamp_converter.h"
28+
#include "benchmark/benchmark.h"
29+
30+
namespace arrow {
31+
32+
const std::vector<std::string> formats = {"1917-10-17,", "2018-09-13 22,",
33+
"1941-06-22 04:00,", "1945-05-09 09:45:38,"};
34+
const std::string file_name = "__data.csv";
35+
36+
const int32_t n_rows = 100000;
37+
const int32_t n_cols = 150;
38+
39+
class ReadCSVBenchmark : public benchmark::Fixture {
40+
public:
41+
void SetUp(const ::benchmark::State& state) {
42+
generateCSV(file_name, formats, n_cols, n_rows);
43+
}
44+
45+
void TearDown(const ::benchmark::State& state) {
46+
std::remove(file_name.c_str());
47+
}
48+
49+
private:
50+
void generateCSV(const std::string& path_csv, const std::vector<std::string>& dates,
51+
int32_t cols, int32_t rows) {
52+
::srand(777);
53+
std::ofstream file;
54+
file.open(path_csv, std::ios::out);
55+
56+
for (int32_t row = 0; row < rows; ++row) {
57+
for (int32_t col = 0; col < cols; ++col) {
58+
file << dates[rand() % dates.size()];
59+
}
60+
file << "\n";
61+
}
62+
file.close();
63+
}
64+
};
65+
66+
static void readCSV(const std::string& path_csv, const csv::ConvertOptions& convert_opt) {
67+
arrow::Status st;
68+
auto memory_pool = default_memory_pool();
69+
70+
std::shared_ptr<io::ReadableFile> input;
71+
auto file_result = io::ReadableFile::Open(path_csv.c_str());
72+
st = file_result.status();
73+
input = file_result.ValueOrDie();
74+
75+
auto read_opt = csv::ReadOptions::Defaults();
76+
auto parse_opt = csv::ParseOptions::Defaults();
77+
78+
auto table_reader_result =
79+
csv::TableReader::Make(memory_pool, input, read_opt, parse_opt, convert_opt);
80+
st = table_reader_result.status();
81+
auto table_reader = table_reader_result.ValueOrDie();
82+
83+
std::shared_ptr<Table> arrowTable;
84+
85+
auto arrow_table_result = table_reader->Read();
86+
st = arrow_table_result.status();
87+
arrowTable = arrow_table_result.ValueOrDie();
88+
}
89+
90+
BENCHMARK_F(ReadCSVBenchmark, ConverterISO)(benchmark::State& state) {
91+
for (auto _ : state) {
92+
auto convert_options = csv::ConvertOptions::Defaults();
93+
convert_options.timestamp_converters.push_back(
94+
std::make_shared<ISO8601Parser>(timestamp(TimeUnit::SECOND)));
95+
readCSV(file_name, convert_options);
96+
}
97+
state.SetItemsProcessed(state.iterations());
98+
}
99+
100+
BENCHMARK_F(ReadCSVBenchmark, ConverterDefault)(benchmark::State& state) {
101+
for (auto _ : state) {
102+
auto convert_options = csv::ConvertOptions::Defaults();
103+
readCSV(file_name, convert_options);
104+
}
105+
state.SetItemsProcessed(state.iterations());
106+
}
107+
108+
} // namespace arrow

0 commit comments

Comments
 (0)