Skip to content

Commit 706b74d

Browse files
author
Aperjump
committed
Finish inner join function
1 parent 1c10b61 commit 706b74d

File tree

2 files changed

+38
-19
lines changed

2 files changed

+38
-19
lines changed

include/boost/numeric/ublas/data_frame.hpp

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <list>
77
#include <string>
88
#include <unordered_map>
9+
#include <map>
910
#include <vector>
1011
#include <initializer_list>
1112
#include <tuple>
@@ -260,12 +261,6 @@ class data_frame {
260261
void for_each_in_tuple(std::tuple<Ts...> const& t, F f, const std::vector<std::string>& names) {
261262
for_each(t, f, std::index_sequence_for<Ts...>{}, names);
262263
}
263-
264-
// auto get_tuple_types() {
265-
// for (const auto& p: type_map) {
266-
267-
// }
268-
// }
269264
int cur_rows;
270265
store_t vals;
271266
/* col_names_map and type_map should maintain consistent */
@@ -416,7 +411,7 @@ template<typename T,
416411
typename... Types2,
417412
template<class...> class TypeLists1, typename... InnerTypes1,
418413
template<class...> class TypeLists2, typename... InnerTypes2>
419-
auto combine(const data_frame<Types1...>& l, const data_frame<Types2...>& r,
414+
auto combine_inner(const data_frame<Types1...>& l, const data_frame<Types2...>& r,
420415
const std::string& col_name,
421416
TypeLists1<InnerTypes1...>, const std::vector<std::string>& colnamesl,
422417
TypeLists2<InnerTypes2...>, const std::vector<std::string>& colnamesr) {
@@ -429,8 +424,8 @@ auto combine(const data_frame<Types1...>& l, const data_frame<Types2...>& r,
429424
auto merge_type_collection = merge_types(type_collection_l{}, type_collection_r{});
430425
int llen = l.get_cur_rows();
431426
int rlen = r.get_cur_rows();
432-
std::unordered_multimap<T, size_t> valueTopos1;
433-
std::unordered_multimap<T, size_t> valueTopos2;
427+
std::multimap<T, size_t> valueTopos1;
428+
std::multimap<T, size_t> valueTopos2;
434429
// Must iterate twice to get the number of rows in result data frame
435430
for (int i = 0; i < llen; i++)
436431
valueTopos1.insert({l.data_frame<Types1...>::template get_c<T>(col_name, i), i});
@@ -439,16 +434,38 @@ auto combine(const data_frame<Types1...>& l, const data_frame<Types2...>& r,
439434
if (valueTopos1.count(val))
440435
valueTopos2.insert({val, j});
441436
}
442-
int rows = valueTopos2.size();
443-
auto* new_df = new data_frame(rows, merge_type_collection);
444-
using new_df_type = typename std::remove_pointer<decltype(new_df)>::type;
445-
// init columns
446-
new_df->init_columns(std::tuple<InnerTypes1...>{}, colnamesl, rows);
447-
new_df->init_columns(std::tuple<InnerTypes2...>{}, colnamesr, rows);
448-
// need to iterate through one the
449-
std::cout << "New df rows: " << new_df->get_cur_rows() << std::endl;
437+
// get concated tuple type and names
438+
auto tuple_cat_val = std::tuple_cat(std::tuple<InnerTypes1...>{}, std::tuple<InnerTypes2...>{});
439+
std::vector<std::string> col_names;
440+
for (const auto& l_name: colnamesl) { col_names.push_back(l_name); }
441+
for (const auto& r_name: colnamesr) { col_names.push_back(r_name); }
442+
// need to iterate through valueTopos2 to get common data
443+
std::vector<decltype(tuple_cat_val)> new_tuple_vec;
444+
for (const auto& iter: valueTopos2) {
445+
T key = iter.first;
446+
size_t pos = iter.second;
447+
std::tuple<InnerTypes2...> right_tuple = for_each_in_tuple(std::tuple<InnerTypes2...>{}, &r, colnamesr, pos);
448+
auto tmp_range = valueTopos1.equal_range(key);
449+
for (auto i = tmp_range.first; i != tmp_range.second; ++i) {
450+
size_t l_pos = i->second;
451+
std::tuple<InnerTypes1...> left_tuple = for_each_in_tuple(std::tuple<InnerTypes1...>{}, &l, colnamesl, l_pos);
452+
auto combined_tuple = std::tuple_cat(left_tuple, right_tuple);
453+
new_tuple_vec.push_back(combined_tuple);
454+
}
455+
}
456+
auto new_df = make_from_tuples(new_tuple_vec, col_names, tuple_cat_val);
450457
return new_df;
451458
}
459+
template<typename... Ts, typename... Types, std::size_t ... Is>
460+
auto for_each(const std::tuple<Ts...>& t, const data_frame<Types...>* df,
461+
const std::vector<std::string>& names, size_t pos, std::index_sequence<Is...>) {
462+
return std::make_tuple(df->data_frame<Types...>::template get_c<Ts>(names[Is], pos)...);
463+
}
464+
template<typename... Types, typename... Ts>
465+
auto for_each_in_tuple(const std::tuple<Ts...>& t, const data_frame<Types...>* df,
466+
const std::vector<std::string>& names, size_t pos) {
467+
return for_each(t, df, names, pos, std::index_sequence_for<Ts...>{});
468+
}
452469
template<class... Types>
453470
class data_frame_view {
454471
public:

test/data_frame_test.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,10 @@ TEST(Data_frame, combine_data_frames) {
181181
std::make_tuple(2.2, "world"s, 40),
182182
std::make_tuple(1.1, "bili"s, 50)},
183183
{"double_vec", "str_vec", "int_vec"});
184-
auto df3 = combine<double>(df1, df2, "double_vec", std::tuple<double, long>{}, {"double_vec", "long_vec"},
185-
std::tuple<double, std::string, int>{}, {"double_vec", "str_vec", "int_vec"});
184+
auto df3 = combine_inner<double>(df1, df2, "double_vec",
185+
std::tuple<double, long>{}, {"double_vec", "long_vec"},
186+
std::tuple<double, std::string, int>{}, {"double_vec", "str_vec", "int_vec"});
186187
EXPECT_EQ(df3->get_cur_rows(), 4);
187188
EXPECT_EQ(df3->get_cur_cols(), 4);
189+
df3->print_with_index({0, 1, 2, 3});
188190
}

0 commit comments

Comments
 (0)