|
5 | 5 | #include <algorithm> // for max
|
6 | 6 | #include <array> // for array
|
7 | 7 | #include <cstdio> // for snprintf
|
| 8 | +#include <cstring> // for memcpy |
8 | 9 | #include <exception> // for exception
|
9 | 10 | #include <initializer_list> // for initializer_list
|
10 | 11 | #include <iterator> // for forward_iterator_tag, random_ac...
|
@@ -145,6 +146,8 @@ class r_vector {
|
145 | 146 | /// Implemented in specialization
|
146 | 147 | static underlying_type* get_p(bool is_altrep, SEXP data);
|
147 | 148 | /// Implemented in specialization
|
| 149 | + static underlying_type const* get_const_p(bool is_altrep, SEXP data); |
| 150 | + /// Implemented in specialization |
148 | 151 | static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf);
|
149 | 152 | /// Implemented in specialization
|
150 | 153 | static SEXPTYPE get_sexptype();
|
@@ -311,8 +314,13 @@ class r_vector : public cpp11::r_vector<T> {
|
311 | 314 | /// Implemented in specialization
|
312 | 315 | static void set_elt(SEXP x, R_xlen_t i, underlying_type value);
|
313 | 316 |
|
| 317 | + static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size); |
| 318 | + static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size); |
| 319 | + static SEXP resize_names(SEXP x, R_xlen_t size); |
| 320 | + |
314 | 321 | using cpp11::r_vector<T>::get_elt;
|
315 | 322 | using cpp11::r_vector<T>::get_p;
|
| 323 | + using cpp11::r_vector<T>::get_const_p; |
316 | 324 | using cpp11::r_vector<T>::get_sexptype;
|
317 | 325 | using cpp11::r_vector<T>::valid_type;
|
318 | 326 | using cpp11::r_vector<T>::valid_length;
|
@@ -759,8 +767,25 @@ inline r_vector<T>::r_vector(SEXP&& data, bool is_altrep)
|
759 | 767 | : cpp11::r_vector<T>(data, is_altrep), capacity_(length_) {}
|
760 | 768 |
|
761 | 769 | template <typename T>
|
762 |
| -inline r_vector<T>::r_vector(const r_vector& rhs) |
763 |
| - : cpp11::r_vector<T>(safe[Rf_shallow_duplicate](rhs)), capacity_(rhs.capacity_) {} |
| 770 | +inline r_vector<T>::r_vector(const r_vector& rhs) { |
| 771 | + // We don't want to just pass through to the read-only constructor because we'd |
| 772 | + // have to convert to `SEXP` first, which could truncate, and then we'd still have |
| 773 | + // to shallow duplicate after that to ensure we have a duplicate, which can result in |
| 774 | + // too many copies (#369). |
| 775 | + // |
| 776 | + // Instead we take control of setting all fields to try and only duplicate 1 time. |
| 777 | + // We try and reclaim unused capacity during the duplication by only reserving up to |
| 778 | + // the `rhs.length_`. This is nice because if the user returns this object, the |
| 779 | + // truncation has already been done and they don't have to pay for another allocation. |
| 780 | + // Importantly, `reserve_data()` always duplicates even if there wasn't extra capacity, |
| 781 | + // which ensures we have our own copy. |
| 782 | + data_ = reserve_data(rhs.data_, rhs.is_altrep_, rhs.length_); |
| 783 | + protect_ = detail::store::insert(data_); |
| 784 | + is_altrep_ = ALTREP(data_); |
| 785 | + data_p_ = get_p(is_altrep_, data_); |
| 786 | + length_ = rhs.length_; |
| 787 | + capacity_ = rhs.length_; |
| 788 | +} |
764 | 789 |
|
765 | 790 | template <typename T>
|
766 | 791 | inline r_vector<T>::r_vector(r_vector&& rhs) {
|
@@ -1048,7 +1073,7 @@ inline void r_vector<T>::reserve(R_xlen_t new_capacity) {
|
1048 | 1073 | SEXP old_protect = protect_;
|
1049 | 1074 |
|
1050 | 1075 | data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity)
|
1051 |
| - : safe[Rf_xlengthgets](data_, new_capacity); |
| 1076 | + : reserve_data(data_, is_altrep_, new_capacity); |
1052 | 1077 | protect_ = detail::store::insert(data_);
|
1053 | 1078 | is_altrep_ = ALTREP(data_);
|
1054 | 1079 | data_p_ = get_p(is_altrep_, data_);
|
@@ -1249,6 +1274,83 @@ inline typename r_vector<T>::iterator r_vector<T>::iterator::operator+(R_xlen_t
|
1249 | 1274 | return it;
|
1250 | 1275 | }
|
1251 | 1276 |
|
| 1277 | +// Compared to `Rf_xlengthgets()`: |
| 1278 | +// - This always allocates, even if it is the same size, which is important when we use |
| 1279 | +// it in a constructor and need to ensure that it duplicates on the way in. |
| 1280 | +// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we |
| 1281 | +// use it in constructors and when we truncate right before returning from the `SEXP` |
| 1282 | +// operator. |
| 1283 | +// - This is more friendly to ALTREP `x`. |
| 1284 | +template <typename T> |
| 1285 | +inline SEXP r_vector<T>::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { |
| 1286 | + // Resize core data |
| 1287 | + SEXP out = PROTECT(resize_data(x, is_altrep, size)); |
| 1288 | + |
| 1289 | + // Resize names, if required |
| 1290 | + SEXP names = Rf_getAttrib(x, R_NamesSymbol); |
| 1291 | + if (names != R_NilValue) { |
| 1292 | + names = resize_names(names, size); |
| 1293 | + Rf_setAttrib(out, R_NamesSymbol, names); |
| 1294 | + } |
| 1295 | + |
| 1296 | + // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. |
| 1297 | + // Does not copy over names, dim, or dim names. |
| 1298 | + // Names are handled already. Dim and dim names should not be applicable, |
| 1299 | + // as this is a vector. |
| 1300 | + // Does not look like it would ever error in our use cases, so no `safe[]`. |
| 1301 | + Rf_copyMostAttrib(x, out); |
| 1302 | + |
| 1303 | + UNPROTECT(1); |
| 1304 | + return out; |
| 1305 | +} |
| 1306 | + |
| 1307 | +template <typename T> |
| 1308 | +inline SEXP r_vector<T>::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { |
| 1309 | + underlying_type const* v_x = get_const_p(is_altrep, x); |
| 1310 | + |
| 1311 | + SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); |
| 1312 | + underlying_type* v_out = get_p(ALTREP(out), out); |
| 1313 | + |
| 1314 | + const R_xlen_t x_size = Rf_xlength(x); |
| 1315 | + const R_xlen_t copy_size = (x_size > size) ? size : x_size; |
| 1316 | + |
| 1317 | + // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly |
| 1318 | + // copy everything from `x`) |
| 1319 | + if (v_x != nullptr && v_out != nullptr) { |
| 1320 | + std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); |
| 1321 | + } else { |
| 1322 | + // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP |
| 1323 | + for (R_xlen_t i = 0; i < copy_size; ++i) { |
| 1324 | + set_elt(out, i, get_elt(x, i)); |
| 1325 | + } |
| 1326 | + } |
| 1327 | + |
| 1328 | + UNPROTECT(1); |
| 1329 | + return out; |
| 1330 | +} |
| 1331 | + |
| 1332 | +template <typename T> |
| 1333 | +inline SEXP r_vector<T>::resize_names(SEXP x, R_xlen_t size) { |
| 1334 | + const SEXP* v_x = STRING_PTR_RO(x); |
| 1335 | + |
| 1336 | + SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); |
| 1337 | + |
| 1338 | + const R_xlen_t x_size = Rf_xlength(x); |
| 1339 | + const R_xlen_t copy_size = (x_size > size) ? size : x_size; |
| 1340 | + |
| 1341 | + for (R_xlen_t i = 0; i < copy_size; ++i) { |
| 1342 | + SET_STRING_ELT(out, i, v_x[i]); |
| 1343 | + } |
| 1344 | + |
| 1345 | + // Ensure remaining names are initialized to `""` |
| 1346 | + for (R_xlen_t i = copy_size; i < size; ++i) { |
| 1347 | + SET_STRING_ELT(out, i, R_BlankString); |
| 1348 | + } |
| 1349 | + |
| 1350 | + UNPROTECT(1); |
| 1351 | + return out; |
| 1352 | +} |
| 1353 | + |
1252 | 1354 | } // namespace writable
|
1253 | 1355 |
|
1254 | 1356 | // TODO: is there a better condition we could use, e.g. assert something true
|
|
0 commit comments