Description
When performing subscript assignment of a std::string
to writable::strings
there's first a conversion to the r_string
class which includes a protection. However, that protection shouldn't be necessary if the result is immediately assigned and there is no translation.
Here's a benchmark compared to Rcpp, where I am generating random strings and assigning them to a pre-allocated vector.
microbenchmark(
rcpp = assign_rcpp(n=1e5, -1), # Assign random strings to Rcpp::CharacterVector
cpp11 = assign_cpp11(n=1e5, -1), # Assign random strings to cpp11::writable::strings
times = 20, setup = gc(full=TRUE))
Unit: milliseconds
expr min lq mean median uq max neval
rcpp 35.09063 36.53801 37.74236 37.3688 39.03084 41.29021 20
cpp11 145.24590 148.24958 154.64878 151.7362 158.87924 177.13784 20
(The performance difference depends a lot on platform, but there's a 4x difference on a mac laptop.)
One possibility is to add some sort of overload or specialization for std::string
for subscript assignment and push_back to short circuit the conversion to r_string
.
Here's an example for push_back
(https://github.com/traversc/cpp11/blob/main/inst/include/cpp11/strings.hpp#L133)
microbenchmark::microbenchmark(
grow_strings_cpp11 = grow_strings_cpp11(1e5, -1), # push_back 1e5 random strings, existing implementation
grow_strings_fast_cpp11 = grow_strings_fast_cpp11(1e5, -1), # push_back_fast implementation
grow_strings_manual = grow_strings_manual(1e5, -1), # manual re-implementation using only SEXP
times = 20, setup = gc(full=TRUE))
Unit: milliseconds
expr min lq mean median uq max neval
grow_strings_cpp11 148.06594 152.42953 158.78716 156.06482 163.46553 183.88168 20
grow_strings_fast_cpp11 38.22086 39.39721 40.47245 40.07249 41.43170 44.69328 20
grow_strings_manual 38.65436 40.02137 41.00465 41.10390 41.81826 46.18455 20
assign_rcpp / assign_cpp11 benchmark functions
#include <random>
std::random_device rd;
std::mt19937 gen(rd());
int random_int(int min, int max) {
std::uniform_int_distribution<int> dist(min, max);
return dist(gen);
}
std::string random_string() {
std::string s(10, '\0');
for (size_t i = 0; i < 10; i++) {
s[i] = random_int(0, 25) + 'a';
}
return s;
}
#include <Rcpp.h>
using namespace Rcpp;
# Rcpp
// [[Rcpp::export(rng=false)]]
CharacterVector assign_rcpp(size_t n, int seed) {
CharacterVector x(n);
if(seed != -1) gen.seed(seed);
for (size_t i = 0; i < n; ++i) {
x[i] = random_string();
}
return x;
}
#include "cpp11.hpp"
using namespace cpp11;
namespace writable = cpp11::writable;
# cpp11
[[cpp11::register]]
writable::strings assign_cpp11(size_t n, int seed) {
if(seed != -1) gen.seed(seed);
writable::strings x(n);
for (size_t i = 0; i < n; ++i) {
x[i] = random_string();
}
return x;
}
push_back benchmark functions
#include "cpp11.hpp"
#include <vector>
#include <random>
using namespace cpp11;
namespace writable = cpp11::writable;
std::random_device rd;
std::mt19937 gen(rd());
double random_double() {
std::uniform_real_distribution<double> dist(0.0, 1.0);
return dist(gen);
}
int random_int(int min, int max) {
std::uniform_int_distribution<int> dist(min, max);
return dist(gen);
}
std::string random_string() {
std::string s(10, '\0');
for (size_t i = 0; i < 10; i++) {
s[i] = random_int(0, 25) + 'a';
}
return s;
}
[[cpp11::register]]
writable::strings grow_strings_cpp11(size_t n, int seed) {
if(seed != -1) gen.seed(seed);
writable::strings x;
for (size_t i = 0; i < n; ++i) {
x.push_back(random_string());
}
return x;
}
[[cpp11::register]]
writable::strings grow_strings_fast_cpp11(size_t n, int seed) {
if(seed != -1) gen.seed(seed);
writable::strings x;
for (size_t i = 0; i < n; ++i) {
x.push_back_fast(random_string());
}
return x;
}
[[cpp11::register]]
SEXP grow_strings_manual(size_t n, int seed) {
if(seed != -1) gen.seed(seed);
SEXP data_ = PROTECT(Rf_allocVector(STRSXP, 0));
size_t size_ = 0;
size_t capacity_ = 0;
for (size_t i = 0; i < n; ++i) {
if(size_ == capacity_) {
capacity_ = capacity_ == 0 ? 1 : capacity_ * 2;
SEXP new_data_ = PROTECT(Rf_allocVector(STRSXP, capacity_));
for(size_t j = 0; j < size_; ++j) {
SET_STRING_ELT(new_data_, j, STRING_ELT(data_, j));
}
UNPROTECT(2);
data_ = PROTECT(new_data_);
}
SET_STRING_ELT(data_, size_++, Rf_mkChar(random_string().c_str()));
}
// copy back down to size
if(size_ < capacity_) {
SEXP new_data_ = PROTECT(Rf_allocVector(STRSXP, size_));
for(size_t j = 0; j < size_; ++j) {
SET_STRING_ELT(new_data_, j, STRING_ELT(data_, j));
}
UNPROTECT(2);
return new_data_;
} else {
UNPROTECT(1);
return data_;
}
}