Skip to content

allow extended character set in annotations #343

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Both methods are supported. However, for most users we _strongly_ recommend to b
- Boost.Container: header-only
- Boost.Test: header-only or (optionally) as a compiled library, *only used for unit testing*
- Boost.Range: header-only, *only used for unit testing*
- [BTAS](http://github.com/ValeevGroup/BTAS), tag db884b020b5c13c312c07df9d5c03cea2d65afb2 . If usable BTAS installation is not found, TiledArray will download and compile
- [BTAS](http://github.com/ValeevGroup/BTAS), tag 242871710dabd5ef337e5253000d3e38c1d977ba . If usable BTAS installation is not found, TiledArray will download and compile
BTAS from source. *This is the recommended way to compile BTAS for all users*.
- [MADNESS](https://github.com/m-a-d-n-e-s-s/madness), tag 997e8b458c4234fb6c8c2781a5df59cb14b7e700 .
Only the MADworld runtime and BLAS/LAPACK C API component of MADNESS is used by TiledArray.
Expand Down
4 changes: 2 additions & 2 deletions external/versions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ set(TA_TRACKED_MADNESS_PREVIOUS_TAG fae8081179b9d074968b08e064a32e3ca07ab0f1)
set(TA_TRACKED_MADNESS_VERSION 0.10.1)
set(TA_TRACKED_MADNESS_PREVIOUS_VERSION 0.10.1)

set(TA_TRACKED_BTAS_TAG db884b020b5c13c312c07df9d5c03cea2d65afb2)
set(TA_TRACKED_BTAS_PREVIOUS_TAG 1dc111d3efd6d6a4ba6f3b8cbc239315d0392121)
set(TA_TRACKED_BTAS_TAG 242871710dabd5ef337e5253000d3e38c1d977ba)
set(TA_TRACKED_BTAS_PREVIOUS_TAG db884b020b5c13c312c07df9d5c03cea2d65afb2)

set(TA_TRACKED_CUTT_TAG 0e8685bf82910bc7435835f846e88f1b39f47f09)
set(TA_TRACKED_CUTT_PREVIOUS_TAG 592198b93c93b7ca79e7900b9a9f2e79f9dafec3)
Expand Down
2 changes: 1 addition & 1 deletion src/TiledArray/expressions/blk_tsr_expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ class BlkTsrExprBase : public Expr<Derived> {
BlkTsrExprBase(reference array, const std::string& annotation,
const PairRange& bounds)
: Expr_(), array_(array), annotation_(annotation) {
const auto rank = array.range().rank();
const auto rank = array.tiles_range().rank();
lower_bound_.reserve(rank);
upper_bound_.reserve(rank);
int d = 0;
Expand Down
18 changes: 12 additions & 6 deletions src/TiledArray/util/annotation.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,14 @@

#ifndef TILEDARRAY_ANNOTATION_H__INCLUDED
#define TILEDARRAY_ANNOTATION_H__INCLUDED

#include "TiledArray/error.h"

#include <algorithm>
#include <cstring>
#include <sstream>
#include <string>
#include "TiledArray/error.h"
#include <vector>

namespace TiledArray::detail {

Expand Down Expand Up @@ -96,10 +100,12 @@ inline auto tokenize_index(const std::string& s, char delim) {
/// TiledArray defines a string as being a valid index if each character is one
/// of the following:
///
/// - Roman letters A through Z (uppercase and lowercase are allowed)
/// - Base 10 numbers 0 through 9
/// - Whitespace
/// - underscore (`_`), prime (<code>'</code>), comma (`,`), or semicolon (`;`)
/// - Roman letters (`A..Z`, `a..z`)
/// - decimal digits (`0..9`)
/// - whitespace (` `)
/// - comma (`,`)
/// - semicolon (`;`)
/// - any of the following characters: `'`_~!@#$%^&*-+./?:|<>[]{}()`
///
/// Additionally the string can not:
///
Expand All @@ -124,7 +130,7 @@ inline bool is_valid_index(const std::string& idx) {
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"1234567890"
" _',;";
",; '`_~!@#$%^&*-+./?:|<>[]{}()";
// Are valid characters
for (const auto& c : idx)
if (valid_chars.find(c) == std::string::npos) return false;
Expand Down
118 changes: 55 additions & 63 deletions tests/annotation.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "unit_test_config.h"
#include <TiledArray/util/annotation.h>
#include <tiledarray.h>
#include "unit_test_config.h"
using namespace TiledArray::detail;

namespace {
Expand All @@ -14,10 +14,10 @@ corr_map j_idx{{"j", "j"}, {" j", "j"}, {"j ", "j"}};
corr_map k_idx{{"k", "k"}, {" k", "k"}, {"k ", "k"}};

auto combine_maps(const corr_map& lhs, const corr_map& rhs,
const std::string& joiner = ""){
const std::string& joiner = "") {
corr_map rv;
for(auto& [lidx, lcorr] : lhs){
for(auto& [ridx, rcorr] : rhs){
for (auto& [lidx, lcorr] : lhs) {
for (auto& [ridx, rcorr] : rhs) {
rv[lidx + joiner + ridx] = lcorr + joiner + rcorr;
}
}
Expand All @@ -31,7 +31,7 @@ auto i_j_idx = combine_maps(i_idx, j_idx, ",");
auto vov_idx = combine_maps(i_idx, j_idx, ";");
auto vom_idx = combine_maps(k_idx, i_j_idx, ";");
auto mov_idx = combine_maps(i_j_idx, k_idx, ";");
} // namespace
} // namespace

/* We need to remove whitespace from all types of indices: single character,
* multiple character, multiple mode, and tensor-of-tensor. This test suite
Expand All @@ -48,42 +48,36 @@ auto mov_idx = combine_maps(i_j_idx, k_idx, ";");
*/
BOOST_AUTO_TEST_SUITE(remove_whitespace_fxn)

BOOST_AUTO_TEST_CASE(single_character){
for(auto& [idx, corr] : i_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
BOOST_AUTO_TEST_CASE(single_character) {
for (auto& [idx, corr] : i_idx) BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_CASE(multicharacter){
BOOST_AUTO_TEST_CASE(multicharacter) {
auto ij_idx = combine_maps(i_idx, j_idx);

for(auto& [idx, corr] : ij_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
for (auto& [idx, corr] : ij_idx) BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_CASE(matrix_index){
for(auto& [idx, corr] : i_j_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
BOOST_AUTO_TEST_CASE(matrix_index) {
for (auto& [idx, corr] : i_j_idx) BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_CASE(tensor_index){
BOOST_AUTO_TEST_CASE(tensor_index) {
auto i_j_k_idx = combine_maps(i_j_idx, k_idx, ",");
for(auto& [idx, corr] : i_j_k_idx)
for (auto& [idx, corr] : i_j_k_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_CASE(vector_of_vector_index){
for(auto& [idx, corr] : vov_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
BOOST_AUTO_TEST_CASE(vector_of_vector_index) {
for (auto& [idx, corr] : vov_idx) BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_CASE(vector_of_matrix){
for(auto& [idx, corr] : vom_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
BOOST_AUTO_TEST_CASE(vector_of_matrix) {
for (auto& [idx, corr] : vom_idx) BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_CASE(matrix_of_vector){
for(auto& [idx, corr] : mov_idx)
BOOST_CHECK(remove_whitespace(idx) == corr);
BOOST_AUTO_TEST_CASE(matrix_of_vector) {
for (auto& [idx, corr] : mov_idx) BOOST_CHECK(remove_whitespace(idx) == corr);
}

BOOST_AUTO_TEST_SUITE_END()
Expand All @@ -95,7 +89,7 @@ BOOST_AUTO_TEST_SUITE_END()
*/
BOOST_AUTO_TEST_SUITE(tokenize_index_fxn)

BOOST_AUTO_TEST_CASE(split_on_comma){
BOOST_AUTO_TEST_CASE(split_on_comma) {
std::map<std::string, string_vector_t> input2corr{
{"", string_vector_t{""}},
{"hello world", string_vector_t{"hello world"}},
Expand All @@ -106,13 +100,12 @@ BOOST_AUTO_TEST_CASE(split_on_comma){
{"hello,world ", string_vector_t{"hello", "world "}},
{"hello world,", string_vector_t{"hello world", ""}},
{"hello world, ", string_vector_t{"hello world", " "}},
{"1,2,3", string_vector_t{"1", "2", "3"}}
};
for(auto& [str, corr] : input2corr)
{"1,2,3", string_vector_t{"1", "2", "3"}}};
for (auto& [str, corr] : input2corr)
BOOST_CHECK(tokenize_index(str, ',') == corr);
}

BOOST_AUTO_TEST_CASE(split_on_semicolon){
BOOST_AUTO_TEST_CASE(split_on_semicolon) {
std::map<std::string, string_vector_t> input2corr{
{"", string_vector_t{""}},
{"hello world", string_vector_t{"hello world"}},
Expand All @@ -123,9 +116,8 @@ BOOST_AUTO_TEST_CASE(split_on_semicolon){
{"hello;world ", string_vector_t{"hello", "world "}},
{"hello world;", string_vector_t{"hello world", ""}},
{"hello world;", string_vector_t{"hello world", " "}},
{"1;2;3", string_vector_t{"1", "2", "3"}}
};
for(auto& [str, corr] : input2corr)
{"1;2;3", string_vector_t{"1", "2", "3"}}};
for (auto& [str, corr] : input2corr)
BOOST_CHECK(tokenize_index(str, ';') == corr);
}

Expand All @@ -139,29 +131,34 @@ BOOST_AUTO_TEST_SUITE_END()
*/
BOOST_AUTO_TEST_SUITE(is_valid_index_fxn)

BOOST_AUTO_TEST_CASE(valid_indices){
for(auto idx_set : {i_idx, i_j_idx, vov_idx, vom_idx, mov_idx}){
for(auto& [idx, corr] : idx_set) {
BOOST_AUTO_TEST_CASE(valid_indices) {
for (auto idx_set : {i_idx, i_j_idx, vov_idx, vom_idx, mov_idx}) {
for (auto& [idx, corr] : idx_set) {
BOOST_CHECK(is_valid_index(idx));
}
}
// all valid characters forming index name
BOOST_CHECK(
is_valid_index("abcdefghijklmnopqrstuvwxyz,ABCDEFGHIJKLMNOPQRSTUVWXYZ;'`_"
"~!@#$%^&*-+.,/?:|<>[]{}"));
}

BOOST_AUTO_TEST_CASE(unallowed_character){
BOOST_CHECK(is_valid_index("i,&,j") == false);
BOOST_AUTO_TEST_CASE(unallowed_character) {
BOOST_CHECK(is_valid_index("i,\",j") == false);
BOOST_CHECK(is_valid_index("i,\\,j") == false);
}

BOOST_AUTO_TEST_CASE(multiple_semicolons){
BOOST_AUTO_TEST_CASE(multiple_semicolons) {
BOOST_CHECK(is_valid_index("i;j;k") == false);
}

BOOST_AUTO_TEST_CASE(only_whitespace){
BOOST_AUTO_TEST_CASE(only_whitespace) {
BOOST_CHECK(is_valid_index("") == false);
BOOST_CHECK(is_valid_index(" ") == false);
BOOST_CHECK(is_valid_index(" ") == false);
}

BOOST_AUTO_TEST_CASE(empty_index_name){
BOOST_AUTO_TEST_CASE(empty_index_name) {
BOOST_CHECK(is_valid_index("i,") == false);
BOOST_CHECK(is_valid_index(",i") == false);
BOOST_CHECK(is_valid_index("i,,j") == false);
Expand All @@ -182,19 +179,17 @@ BOOST_AUTO_TEST_SUITE_END()
*/
BOOST_AUTO_TEST_SUITE(is_tot_index_fxn)

BOOST_AUTO_TEST_CASE(valid_but_not_tot){
for(auto x : {i_idx, i_j_idx})
for(auto idx : x)
BOOST_CHECK(is_tot_index(idx.first) == false);
BOOST_AUTO_TEST_CASE(valid_but_not_tot) {
for (auto x : {i_idx, i_j_idx})
for (auto idx : x) BOOST_CHECK(is_tot_index(idx.first) == false);
}

BOOST_AUTO_TEST_CASE(valid_tot_idx){
for(auto x : {vov_idx, vom_idx, mov_idx})
for(auto idx : x)
BOOST_CHECK(is_tot_index(idx.first));
BOOST_AUTO_TEST_CASE(valid_tot_idx) {
for (auto x : {vov_idx, vom_idx, mov_idx})
for (auto idx : x) BOOST_CHECK(is_tot_index(idx.first));
}

BOOST_AUTO_TEST_CASE(not_valid_idx){
BOOST_AUTO_TEST_CASE(not_valid_idx) {
BOOST_CHECK(is_tot_index("") == false);
BOOST_CHECK(is_tot_index(";") == false);
}
Expand All @@ -208,32 +203,29 @@ BOOST_AUTO_TEST_SUITE_END()
*/
BOOST_AUTO_TEST_SUITE(split_index_fxn)

BOOST_AUTO_TEST_CASE(invalid_idx){
if(TiledArray::get_default_world().nproc() == 1)
BOOST_AUTO_TEST_CASE(invalid_idx) {
if (TiledArray::get_default_world().nproc() == 1)
BOOST_CHECK_THROW(split_index("i,"), TiledArray::Exception);
}

BOOST_AUTO_TEST_CASE(non_tot){
BOOST_AUTO_TEST_CASE(non_tot) {
std::map<std::string, std::pair<string_vector_t, string_vector_t>> inputs{
{"i", {string_vector_t{"i"}, string_vector_t{}}},
{"i,j", {string_vector_t{"i","j"}, string_vector_t{}}},
{"i,j,k", {string_vector_t{"i", "j", "k"}, string_vector_t{}}}
};
{"i,j", {string_vector_t{"i", "j"}, string_vector_t{}}},
{"i,j,k", {string_vector_t{"i", "j", "k"}, string_vector_t{}}}};

for(auto& [idx, corr] : inputs) {
for (auto& [idx, corr] : inputs) {
BOOST_CHECK(split_index(idx) == corr);
}
}

BOOST_AUTO_TEST_CASE(tot){
BOOST_AUTO_TEST_CASE(tot) {
std::map<std::string, std::pair<string_vector_t, string_vector_t>> inputs{
{"i;j", {string_vector_t{"i"}, string_vector_t{"j"}}},
{"i,j;k", {string_vector_t{"i","j"}, string_vector_t{"k"}}},
{"i;j,k", {string_vector_t{"i"}, string_vector_t{"j", "k"}}}
};
{"i,j;k", {string_vector_t{"i", "j"}, string_vector_t{"k"}}},
{"i;j,k", {string_vector_t{"i"}, string_vector_t{"j", "k"}}}};

for(auto& [idx, corr] : inputs)
BOOST_CHECK(split_index(idx) == corr);
for (auto& [idx, corr] : inputs) BOOST_CHECK(split_index(idx) == corr);
}

BOOST_AUTO_TEST_SUITE_END()
Loading